Coverage for mddb_workflow / tools / membrane_mapping.py: 79%

120 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1import os 

2import numpy as np 

3from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes, parse_index 

4from mddb_workflow.utils.auxiliar import save_json, load_json 

5from mddb_workflow.utils.constants import LIPIDS_RESIDUE_NAMES 

6from mddb_workflow.utils.type_hints import * 

7from contextlib import redirect_stdout 

8 

9 

10def generate_membrane_mapping( 

11 inchikeys: dict[str, 'InChIKeyData'], 

12 lipid_references: dict[str, dict], 

13 structure_file: 'File', 

14 universe: 'Universe', 

15 output_file: 'File', 

16) -> dict: 

17 """Generate a list of residue numbers of membrane components from a given structure and topology file. 

18 

19 Args: 

20 inchikeys: Dictionary mapping InChI keys to InChIKeyData objects. 

21 lipid_references: Dictionary mapping InChI keys to lipid database references. 

22 structure_file: File object representing the structure file (e.g., PDB, GRO). 

23 universe: MDAnalysis Universe object containing the structure and topology. 

24 output_file: Output JSON file to save membrane mapping. 

25 

26 Returns: 

27 dict: A dictionary containing the membrane mapping. 

28 - n_mems (int): Number of membranes detected. 

29 - mems (dict): Dictionary where each key is a membrane index (str) and value contains: 

30 - leaflets (dict): Contains 'top' and 'bot' keys, each with a list of atom indices. 

31 - polar_atoms (dict): Contains 'top' and 'bot' keys, each with a list of polar atom indices. 

32 - no_mem_lipid (list): List of atom indices for lipids not assigned to any membrane. 

33 

34 Notes: 

35 - The function identifies lipid and non-lipid residues based on InChI keys. 

36 - It classifies residues and checks for potential misclassifications. 

37 - Lipid residues are selected and neighboring lipids are found. 

38 - Clusters of lipids are identified, and clusters with more than 30 lipids are considered as membranes. 

39 - If debug is enabled, the function returns additional information including lipid residues, neighbors, counts, and clusters. 

40 

41 """ 

42 if not universe: raise RuntimeError('Missing universe') 

43 # Select only the lipids from the InChIKeyData 

44 lipid_map = {key: inchikeys[key] for key in lipid_references.keys()} 

45 if 'Cg' in universe.atoms.types: 

46 membrane_map = coarse_grain_membranes(structure_file, universe) 

47 else: 

48 membrane_map = all_atom_membranes(lipid_map, structure_file, universe) 

49 if membrane_map['n_mems'] > 0: 

50 save_json(membrane_map, output_file.path) 

51 return membrane_map 

52 

53 

54def all_atom_membranes( 

55 lipid_map: dict[str, 'InChIKeyData'], 

56 structure_file: 'File', 

57 universe: 'Universe', 

58) -> dict: 

59 """Generate membrane mapping for all-atom systems using FATSLiM.""" 

60 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []} 

61 

62 if len(lipid_map) == 0: 

63 # no lipids found in the structure. 

64 return membrane_map 

65 

66 # Select only the lipids and potential membrane members 

67 lipid_ridx = [] 

68 glclipid_ridx = [] 

69 for ref in lipid_map.values(): 

70 lipid_ridx.extend(ref.resindices) 

71 if ref.moltype == 'fragment': 

72 glclipid_ridx.extend(ref.fragments) 

73 # if no lipids are found, we save the empty mapping and return 

74 if len(lipid_ridx) == 0: 

75 # no lipids found in the structure. 

76 return membrane_map 

77 mem_candidates = universe.select_atoms(f'(resindex {" ".join(map(str, (lipid_ridx)))})') 

78 

79 # for better leaflet assignation we only use polar atoms 

80 charges = abs(np.array([atom.charge for atom in universe.atoms])) 

81 polar_atoms = [] 

82 for ridx in mem_candidates.residues.resindices: 

83 res = universe.residues[ridx] 

84 res_ch = charges[res.atoms.ix] 

85 max_ch_idx = np.argmax(res_ch) 

86 polar_atoms.append(res.atoms[max_ch_idx].index) 

87 polar_atoms = np.array(polar_atoms) 

88 headgroup_sel = f'(index {" ".join(map(str, (polar_atoms)))})' 

89 # Run FATSLiM to find the membranes 

90 prop = { 

91 'selection': headgroup_sel, 

92 'cutoff': 2.2, 

93 'ignore_no_box': True, 

94 'disable_logs': True, 

95 'return_hydrogen': True, 

96 # Do not copy the input file to the sandbox 

97 'disable_sandbox': True, 

98 } 

99 output_ndx_path = "tmp_mem_map.ndx" 

100 print(' Running BioBB FATSLiM Membranes') 

101 with redirect_stdout(None): 

102 fatslim_membranes(input_top_path=structure_file.absolute_path, 

103 output_ndx_path=output_ndx_path, 

104 properties=prop) 

105 # Parse the output to get the membrane mapping 

106 mem_map = parse_index(output_ndx_path) 

107 os.remove(output_ndx_path) 

108 # Save the membrane mapping as a JSON file 

109 n_mems = len(mem_map) // 2 

110 membrane_map['n_mems'] = n_mems 

111 no_mem_lipids = set(mem_candidates.atoms.indices) 

112 for i in range(n_mems): 

113 # and they are not assigned to any membrane. FATSLiM indexes start at 1 

114 bot = (np.array(mem_map[f'membrane_{i + 1}_leaflet_1']) - 1).tolist() # JSON does not support numpy arrays 

115 top = (np.array(mem_map[f'membrane_{i + 1}_leaflet_2']) - 1).tolist() 

116 top_ridx = universe.atoms[top].residues.resindices 

117 bot_ridx = universe.atoms[bot].residues.resindices 

118 # Some polar atoms from the glucids are to far from the polar atoms of the lipids 

119 top = set(top) 

120 bot = set(bot) 

121 remove_ridx = [] 

122 for grp in glclipid_ridx: 

123 if np.in1d(grp, top_ridx).any(): 

124 top.update(universe.residues[grp].atoms.indices) 

125 remove_ridx.append(grp) 

126 continue 

127 if np.in1d(grp, bot_ridx).any(): 

128 bot.update(universe.residues[grp].atoms.indices) 

129 remove_ridx.append(grp) 

130 for grp in remove_ridx: 

131 glclipid_ridx.remove(grp) 

132 # Remove lipids not assigned to any membrane 

133 no_mem_lipids -= top 

134 no_mem_lipids -= bot 

135 # Check in which leaflets each of the polar atoms is and save them 

136 membrane_map['mems'][str(i)] = { 

137 'leaflets': { 

138 'bot': list(map(int, bot)), 

139 'top': list(map(int, top)) 

140 }, 

141 'polar_atoms': { 

142 'bot': polar_atoms[np.in1d(polar_atoms, list(bot))].tolist(), 

143 'top': polar_atoms[np.in1d(polar_atoms, list(top))].tolist() 

144 } 

145 } 

146 

147 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids)) 

148 # Print the results and save the membrane mapping 

149 no_mem_lipids_str = f'{len(glclipid_ridx)} lipid/s not assigned to any membrane.' if len(glclipid_ridx) > 0 else '' 

150 print(f'{n_mems} membrane/s found. ' + no_mem_lipids_str) 

151 return membrane_map 

152 

153 

154def coarse_grain_membranes(structure_file: 'File', universe: 'Universe') -> dict: 

155 """Generate membrane mapping for coarse-grained systems using residue names and P atoms as headgroups.""" 

156 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []} 

157 

158 # Find all lipid residues in the system 

159 lipid_residues = [] 

160 for resname in LIPIDS_RESIDUE_NAMES: 

161 lipids = universe.select_atoms(f'resname {resname}') 

162 if len(lipids) > 0: 

163 lipid_residues.append(resname) 

164 

165 if len(lipid_residues) == 0: 

166 print("No coarse-grained lipid residues found in the structure.") 

167 return membrane_map 

168 

169 # Select P atoms (headgroups) from lipid residues 

170 headgroup_atoms = f'resname {" ".join(lipid_residues)} and name P*' 

171 

172 # Run FATSLiM to find the membranes 

173 prop = { 

174 'selection': headgroup_atoms, 

175 'cutoff': 2.5, # Slightly larger cutoff for CG systems 

176 'ignore_no_box': True, 

177 'disable_logs': True, 

178 'disable_sandbox': True, 

179 } 

180 

181 output_ndx_path = "tmp_cg_mem_map.ndx" 

182 print(' Running BioBB FATSLiM Membranes for coarse-grained system') 

183 

184 # with redirect_stdout(None): 

185 fatslim_membranes(input_top_path=structure_file.absolute_path, 

186 output_ndx_path=output_ndx_path, 

187 properties=prop) 

188 

189 # Parse the output to get the membrane mapping 

190 mem_map = parse_index(output_ndx_path) 

191 os.remove(output_ndx_path) 

192 

193 # Process the membrane mapping 

194 n_mems = len(mem_map) // 2 

195 membrane_map['n_mems'] = n_mems 

196 

197 all_lipid_atoms = universe.select_atoms(f'resname {" ".join(lipid_residues)}') 

198 no_mem_lipids = set(all_lipid_atoms.atoms.indices) 

199 

200 for i in range(n_mems): 

201 # FATSLiM indexes start at 1, convert to 0-based 

202 bot_atoms = (np.array(mem_map[f'membrane_{i + 1}_leaflet_1']) - 1).tolist() 

203 top_atoms = (np.array(mem_map[f'membrane_{i + 1}_leaflet_2']) - 1).tolist() 

204 

205 # Remove assigned atoms from no_mem_lipids 

206 no_mem_lipids -= set(bot_atoms) 

207 no_mem_lipids -= set(top_atoms) 

208 

209 membrane_map['mems'][str(i)] = { 

210 'leaflets': { 

211 'bot': bot_atoms, 

212 'top': top_atoms 

213 }, 

214 'polar_atoms': { 

215 'bot': universe.atoms[bot_atoms].select_atoms('name P*').indices.tolist(), 

216 'top': universe.atoms[top_atoms].select_atoms('name P*').indices.tolist() 

217 } 

218 } 

219 

220 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids)) 

221 

222 # Print results 

223 no_mem_lipids_str = f'{len(no_mem_lipids)} lipid atoms not assigned to any membrane.' if len(no_mem_lipids) > 0 else '' 

224 print(f'{n_mems} membrane/s found in coarse-grained system. ' + no_mem_lipids_str) 

225 

226 return membrane_map 

227 

228 

229def display_membrane_mapping(mem_map: str, pdb: str): 

230 """Display the membrane mapping using nglview.""" 

231 try: 

232 import nglview as nv # type: ignore 

233 except ImportError: 

234 raise ImportError("nglview is required for displaying membrane mapping. Please install it using 'pip install nglview'.") 

235 

236 mem_map = load_json(mem_map) 

237 top = f"@{','.join(map(str, mem_map['mems']['0']['leaflets']['top']))}" 

238 bot = f"@{','.join(map(str, mem_map['mems']['0']['leaflets']['bot']))}" 

239 no_mem = f"@{','.join(map(str, mem_map['no_mem_lipid']))}" 

240 

241 view = nv.show_file(pdb) 

242 view.clear(0) 

243 # view.clear(1) 

244 view.add_point(selection='not protein', color='green', scale=1.5) 

245 view.add_point(selection=f'{top}', color='blue') 

246 view.add_point(selection=f'{bot}', color='red') 

247 view.add_point(selection=no_mem, color='yellow', scale=0.5) 

248 return view