Coverage for mddb_workflow/tools/generate_membrane_mapping.py: 53%

121 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1import os 

2import numpy as np 

3from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes, parse_index 

4from mddb_workflow.utils.auxiliar import save_json, load_json 

5from mddb_workflow.utils.constants import LIPIDS_RESIDUE_NAMES 

6from mddb_workflow.utils.type_hints import * 

7from contextlib import redirect_stdout 

8 

9 

10def generate_membrane_mapping(lipid_map : list[dict], 

11 structure_file : 'File', 

12 universe: 'Universe', 

13 output_filepath: str, 

14 ) -> list[dict]: 

15 """ 

16 Generates a list of residue numbers of membrane components from a given structure and topology file. 

17 Args: 

18 structure (Structure): The molecular structure to analyze. 

19 standard_topology_file (File): The topology file in JSON format. 

20 structure_file (File): The structure file. 

21 debug (bool, optional): If True, additional debug information is returned. Defaults to False. 

22  

23 Returns: 

24 list[dict]: A list containing the membrane mapping. If debug is True, additional information is returned. Ex: 

25 

26 { 

27 "n_mems": 1, 

28 "mems": { 

29 "0": { 

30 "leaflets": { 

31 "bot": [ 17096, 17097, ...], 

32 "top": [ 14730, 14804, ...] 

33 } 

34 } 

35 }, 

36 "no_mem_lipid": [] 

37 } 

38  

39 Raises: 

40 AssertionError: If the topology file is not in JSON format. 

41  

42 Notes: 

43 - The function identifies lipid and non-lipid residues based on InChI keys. 

44 - It classifies residues and checks for potential misclassifications. 

45 - Lipid residues are selected and neighboring lipids are found. 

46 - Clusters of lipids are identified, and clusters with more than 30 lipids are considered as membranes. 

47 - If debug is enabled, the function returns additional information including lipid residues, neighbors, counts, and clusters. 

48 """ 

49 

50 if not universe: raise RuntimeError('Missing universe') 

51 

52 # Prepare the membrane mapping OBJ/JSON 

53 is_cg = 'Cg' in universe.atoms.types 

54 

55 if is_cg: 

56 membrane_map = coarse_grain_membranes(structure_file, universe, output_filepath) 

57 else: 

58 membrane_map = all_atom_membranes(lipid_map, structure_file, universe, output_filepath) 

59 save_json(membrane_map, output_filepath) 

60 return membrane_map 

61 

62 

63def all_atom_membranes(lipid_map : list[dict], 

64 structure_file : 'File', 

65 universe: 'Universe', 

66 output_filepath: str, 

67 ) -> list[dict]: 

68 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': {}} 

69 

70 # if no lipids are found, we save the empty mapping and return 

71 if len(lipid_map) == 0: 

72 # no lipids found in the structure. 

73 save_json(membrane_map, output_filepath) 

74 return membrane_map 

75 

76 # Select only the lipids and potential membrane members 

77 lipid_ridx = [] 

78 glclipid_ridx = [] 

79 for ref in lipid_map: 

80 lipid_ridx.extend(ref['residue_indices']) 

81 if ref['fragments']: 

82 glclipid_ridx.extend(ref['fragments']) 

83 # if no lipids are found, we save the empty mapping and return 

84 if len(lipid_ridx) == 0: 

85 # no lipids found in the structure. 

86 save_json(membrane_map, output_filepath) 

87 return membrane_map 

88 mem_candidates = universe.select_atoms(f'(resindex {" ".join(map(str,(lipid_ridx)))})') 

89 

90 # for better leaflet assignation we only use polar atoms 

91 charges = abs(np.array([atom.charge for atom in universe.atoms])) 

92 polar_atoms = [] 

93 for ridx in mem_candidates.residues.resindices: 

94 res = universe.residues[ridx] 

95 res_ch = charges[res.atoms.ix] 

96 max_ch_idx = np.argmax(res_ch) 

97 polar_atoms.append(res.atoms[max_ch_idx].index) 

98 polar_atoms = np.array(polar_atoms) 

99 headgroup_sel = f'(index {" ".join(map(str,(polar_atoms)))})' 

100 # Run FATSLiM to find the membranes 

101 prop = { 

102 'selection': headgroup_sel, 

103 'cutoff': 2.2, 

104 'ignore_no_box': True, 

105 'disable_logs': True, 

106 'return_hydrogen': True, 

107 # Do not copy the input file to the sandbox 

108 'disable_sandbox': True, 

109 } 

110 output_ndx_path = "tmp_mem_map.ndx" 

111 print(' Running BioBB FATSLiM Membranes') 

112 with redirect_stdout(None): 

113 fatslim_membranes(input_top_path=structure_file.absolute_path, 

114 output_ndx_path=output_ndx_path, 

115 properties=prop) 

116 # Parse the output to get the membrane mapping 

117 mem_map = parse_index(output_ndx_path) 

118 os.remove(output_ndx_path) 

119 # Save the membrane mapping as a JSON file 

120 n_mems = len(mem_map)//2 

121 membrane_map['n_mems'] = n_mems 

122 no_mem_lipids = set(mem_candidates.atoms.indices) 

123 for i in range(n_mems): 

124 # and they are not assigned to any membrane. FATSLiM indexes start at 1 

125 bot = (np.array(mem_map[f'membrane_{i+1}_leaflet_1'])-1).tolist() # JSON does not support numpy arrays 

126 top = (np.array(mem_map[f'membrane_{i+1}_leaflet_2'])-1).tolist() 

127 top_ridx = universe.atoms[top].residues.resindices 

128 bot_rdix = universe.atoms[bot].residues.resindices 

129 # Some polar atoms from the glucids are to far from the polar atoms of the lipids 

130 top = set(top) 

131 bot = set(bot) 

132 remove_ridx = [] 

133 for grp in glclipid_ridx: 

134 if np.in1d(grp, top_ridx).any(): 

135 top.update(universe.residues[grp].atoms.indices) 

136 remove_ridx.append(grp) 

137 continue 

138 if np.in1d(grp,bot_rdix).any(): 

139 bot.update(universe.residues[grp].atoms.indices) 

140 remove_ridx.append(grp) 

141 for grp in remove_ridx: 

142 glclipid_ridx.remove(grp) 

143 # Remove lipids not assigned to any membrane 

144 no_mem_lipids -= top 

145 no_mem_lipids -= bot 

146 # Check in which leaflets each of the polar atoms is and save them 

147 membrane_map['mems'][(str(i))] = { 

148 'leaflets': { 

149 'bot': list(map(int, bot)), 

150 'top': list(map(int, top)), 

151 }, 

152 'polar_atoms': { 

153 'bot': polar_atoms[np.in1d(polar_atoms, list(bot))].tolist(), 

154 'top': polar_atoms[np.in1d(polar_atoms, list(top))].tolist(), 

155 } 

156 } 

157 

158 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids)) 

159 # Print the results and save the membrane mapping 

160 no_mem_lipids_str = f'{len(glclipid_ridx)} lipid/s not assigned to any membrane.' if len(glclipid_ridx)>0 else '' 

161 print(f'{n_mems} membrane/s found. ' + no_mem_lipids_str) 

162 return membrane_map 

163 

164 

165def coarse_grain_membranes(structure_file : 'File', 

166 universe: 'Universe', 

167 output_filepath: str, 

168 ) -> list[dict]: 

169 """ 

170 Generates membrane mapping for coarse-grained systems using residue names and P atoms as headgroups. 

171 """ 

172 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []} 

173 

174 # Find all lipid residues in the system 

175 lipid_residues = [] 

176 for resname in LIPIDS_RESIDUE_NAMES: 

177 lipids = universe.select_atoms(f'resname {resname}') 

178 if len(lipids) > 0: 

179 lipid_residues.append(resname) 

180 

181 if len(lipid_residues) == 0: 

182 print("No coarse-grained lipid residues found in the structure.") 

183 return membrane_map 

184 

185 # Select P atoms (headgroups) from lipid residues 

186 headgroup_atoms = f'resname {" ".join(lipid_residues)} and name P*' 

187 

188 # Run FATSLiM to find the membranes 

189 prop = { 

190 'selection': headgroup_atoms, 

191 'cutoff': 2.5, # Slightly larger cutoff for CG systems 

192 'ignore_no_box': True, 

193 'disable_logs': True, 

194 'disable_sandbox': True, 

195 } 

196 

197 output_ndx_path = "tmp_cg_mem_map.ndx" 

198 print(' Running BioBB FATSLiM Membranes for coarse-grained system') 

199 

200 #with redirect_stdout(None): 

201 fatslim_membranes(input_top_path=structure_file.absolute_path, 

202 output_ndx_path=output_ndx_path, 

203 properties=prop) 

204 

205 # Parse the output to get the membrane mapping 

206 mem_map = parse_index(output_ndx_path) 

207 os.remove(output_ndx_path) 

208 

209 # Process the membrane mapping 

210 n_mems = len(mem_map) // 2 

211 membrane_map['n_mems'] = n_mems 

212 

213 all_lipid_atoms = universe.select_atoms(f'resname {" ".join(lipid_residues)}') 

214 no_mem_lipids = set(all_lipid_atoms.atoms.indices) 

215 

216 for i in range(n_mems): 

217 # FATSLiM indexes start at 1, convert to 0-based 

218 bot_atoms = (np.array(mem_map[f'membrane_{i+1}_leaflet_1']) - 1).tolist() 

219 top_atoms = (np.array(mem_map[f'membrane_{i+1}_leaflet_2']) - 1).tolist() 

220 

221 # Remove assigned atoms from no_mem_lipids 

222 no_mem_lipids -= set(bot_atoms) 

223 no_mem_lipids -= set(top_atoms) 

224 

225 membrane_map['mems'][str(i)] = { 

226 'leaflets': { 

227 'bot': bot_atoms, 

228 'top': top_atoms, 

229 }, 

230 'polar_atoms': { 

231 'bot': universe.atoms[bot_atoms].select_atoms('name P*').indices.tolist(), 

232 'top': universe.atoms[top_atoms].select_atoms('name P*').indices.tolist(), 

233 } 

234 } 

235 

236 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids)) 

237 

238 # Print results 

239 no_mem_lipids_str = f'{len(no_mem_lipids)} lipid atoms not assigned to any membrane.' if len(no_mem_lipids) > 0 else '' 

240 print(f'{n_mems} membrane/s found in coarse-grained system. ' + no_mem_lipids_str) 

241 

242 return membrane_map 

243 

244 

245def display_membrane_mapping(mem_map: str, pdb: str): 

246 try: 

247 import nglview as nv # type: ignore 

248 except ImportError: 

249 raise ImportError("nglview is required for displaying membrane mapping. Please install it using 'pip install nglview'.") 

250 

251 mem_map = load_json(mem_map) 

252 top = f"@{','.join(map(str,mem_map['mems']['0']['leaflets']['top']))}" 

253 bot = f"@{','.join(map(str,mem_map['mems']['0']['leaflets']['bot']))}" 

254 no_mem = f"@{','.join(map(str,mem_map['no_mem_lipid']))}" 

255 

256 view = nv.show_file(pdb) 

257 view.clear(0) 

258 #view.clear(1) 

259 view.add_point(selection='not protein', color='green', scale=1.5) 

260 view.add_point(selection=f'{top}', color='blue') 

261 view.add_point(selection=f'{bot}', color='red') 

262 view.add_point(selection=no_mem, color='yellow', scale=0.5) 

263 return view