Coverage for mddb_workflow/tools/generate_membrane_mapping.py: 53%
121 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1import os
2import numpy as np
3from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes, parse_index
4from mddb_workflow.utils.auxiliar import save_json, load_json
5from mddb_workflow.utils.constants import LIPIDS_RESIDUE_NAMES
6from mddb_workflow.utils.type_hints import *
7from contextlib import redirect_stdout
10def generate_membrane_mapping(lipid_map : list[dict],
11 structure_file : 'File',
12 universe: 'Universe',
13 output_filepath: str,
14 ) -> list[dict]:
15 """
16 Generates a list of residue numbers of membrane components from a given structure and topology file.
17 Args:
18 structure (Structure): The molecular structure to analyze.
19 standard_topology_file (File): The topology file in JSON format.
20 structure_file (File): The structure file.
21 debug (bool, optional): If True, additional debug information is returned. Defaults to False.
23 Returns:
24 list[dict]: A list containing the membrane mapping. If debug is True, additional information is returned. Ex:
26 {
27 "n_mems": 1,
28 "mems": {
29 "0": {
30 "leaflets": {
31 "bot": [ 17096, 17097, ...],
32 "top": [ 14730, 14804, ...]
33 }
34 }
35 },
36 "no_mem_lipid": []
37 }
39 Raises:
40 AssertionError: If the topology file is not in JSON format.
42 Notes:
43 - The function identifies lipid and non-lipid residues based on InChI keys.
44 - It classifies residues and checks for potential misclassifications.
45 - Lipid residues are selected and neighboring lipids are found.
46 - Clusters of lipids are identified, and clusters with more than 30 lipids are considered as membranes.
47 - If debug is enabled, the function returns additional information including lipid residues, neighbors, counts, and clusters.
48 """
50 if not universe: raise RuntimeError('Missing universe')
52 # Prepare the membrane mapping OBJ/JSON
53 is_cg = 'Cg' in universe.atoms.types
55 if is_cg:
56 membrane_map = coarse_grain_membranes(structure_file, universe, output_filepath)
57 else:
58 membrane_map = all_atom_membranes(lipid_map, structure_file, universe, output_filepath)
59 save_json(membrane_map, output_filepath)
60 return membrane_map
63def all_atom_membranes(lipid_map : list[dict],
64 structure_file : 'File',
65 universe: 'Universe',
66 output_filepath: str,
67 ) -> list[dict]:
68 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': {}}
70 # if no lipids are found, we save the empty mapping and return
71 if len(lipid_map) == 0:
72 # no lipids found in the structure.
73 save_json(membrane_map, output_filepath)
74 return membrane_map
76 # Select only the lipids and potential membrane members
77 lipid_ridx = []
78 glclipid_ridx = []
79 for ref in lipid_map:
80 lipid_ridx.extend(ref['residue_indices'])
81 if ref['fragments']:
82 glclipid_ridx.extend(ref['fragments'])
83 # if no lipids are found, we save the empty mapping and return
84 if len(lipid_ridx) == 0:
85 # no lipids found in the structure.
86 save_json(membrane_map, output_filepath)
87 return membrane_map
88 mem_candidates = universe.select_atoms(f'(resindex {" ".join(map(str,(lipid_ridx)))})')
90 # for better leaflet assignation we only use polar atoms
91 charges = abs(np.array([atom.charge for atom in universe.atoms]))
92 polar_atoms = []
93 for ridx in mem_candidates.residues.resindices:
94 res = universe.residues[ridx]
95 res_ch = charges[res.atoms.ix]
96 max_ch_idx = np.argmax(res_ch)
97 polar_atoms.append(res.atoms[max_ch_idx].index)
98 polar_atoms = np.array(polar_atoms)
99 headgroup_sel = f'(index {" ".join(map(str,(polar_atoms)))})'
100 # Run FATSLiM to find the membranes
101 prop = {
102 'selection': headgroup_sel,
103 'cutoff': 2.2,
104 'ignore_no_box': True,
105 'disable_logs': True,
106 'return_hydrogen': True,
107 # Do not copy the input file to the sandbox
108 'disable_sandbox': True,
109 }
110 output_ndx_path = "tmp_mem_map.ndx"
111 print(' Running BioBB FATSLiM Membranes')
112 with redirect_stdout(None):
113 fatslim_membranes(input_top_path=structure_file.absolute_path,
114 output_ndx_path=output_ndx_path,
115 properties=prop)
116 # Parse the output to get the membrane mapping
117 mem_map = parse_index(output_ndx_path)
118 os.remove(output_ndx_path)
119 # Save the membrane mapping as a JSON file
120 n_mems = len(mem_map)//2
121 membrane_map['n_mems'] = n_mems
122 no_mem_lipids = set(mem_candidates.atoms.indices)
123 for i in range(n_mems):
124 # and they are not assigned to any membrane. FATSLiM indexes start at 1
125 bot = (np.array(mem_map[f'membrane_{i+1}_leaflet_1'])-1).tolist() # JSON does not support numpy arrays
126 top = (np.array(mem_map[f'membrane_{i+1}_leaflet_2'])-1).tolist()
127 top_ridx = universe.atoms[top].residues.resindices
128 bot_rdix = universe.atoms[bot].residues.resindices
129 # Some polar atoms from the glucids are to far from the polar atoms of the lipids
130 top = set(top)
131 bot = set(bot)
132 remove_ridx = []
133 for grp in glclipid_ridx:
134 if np.in1d(grp, top_ridx).any():
135 top.update(universe.residues[grp].atoms.indices)
136 remove_ridx.append(grp)
137 continue
138 if np.in1d(grp,bot_rdix).any():
139 bot.update(universe.residues[grp].atoms.indices)
140 remove_ridx.append(grp)
141 for grp in remove_ridx:
142 glclipid_ridx.remove(grp)
143 # Remove lipids not assigned to any membrane
144 no_mem_lipids -= top
145 no_mem_lipids -= bot
146 # Check in which leaflets each of the polar atoms is and save them
147 membrane_map['mems'][(str(i))] = {
148 'leaflets': {
149 'bot': list(map(int, bot)),
150 'top': list(map(int, top)),
151 },
152 'polar_atoms': {
153 'bot': polar_atoms[np.in1d(polar_atoms, list(bot))].tolist(),
154 'top': polar_atoms[np.in1d(polar_atoms, list(top))].tolist(),
155 }
156 }
158 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids))
159 # Print the results and save the membrane mapping
160 no_mem_lipids_str = f'{len(glclipid_ridx)} lipid/s not assigned to any membrane.' if len(glclipid_ridx)>0 else ''
161 print(f'{n_mems} membrane/s found. ' + no_mem_lipids_str)
162 return membrane_map
165def coarse_grain_membranes(structure_file : 'File',
166 universe: 'Universe',
167 output_filepath: str,
168 ) -> list[dict]:
169 """
170 Generates membrane mapping for coarse-grained systems using residue names and P atoms as headgroups.
171 """
172 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []}
174 # Find all lipid residues in the system
175 lipid_residues = []
176 for resname in LIPIDS_RESIDUE_NAMES:
177 lipids = universe.select_atoms(f'resname {resname}')
178 if len(lipids) > 0:
179 lipid_residues.append(resname)
181 if len(lipid_residues) == 0:
182 print("No coarse-grained lipid residues found in the structure.")
183 return membrane_map
185 # Select P atoms (headgroups) from lipid residues
186 headgroup_atoms = f'resname {" ".join(lipid_residues)} and name P*'
188 # Run FATSLiM to find the membranes
189 prop = {
190 'selection': headgroup_atoms,
191 'cutoff': 2.5, # Slightly larger cutoff for CG systems
192 'ignore_no_box': True,
193 'disable_logs': True,
194 'disable_sandbox': True,
195 }
197 output_ndx_path = "tmp_cg_mem_map.ndx"
198 print(' Running BioBB FATSLiM Membranes for coarse-grained system')
200 #with redirect_stdout(None):
201 fatslim_membranes(input_top_path=structure_file.absolute_path,
202 output_ndx_path=output_ndx_path,
203 properties=prop)
205 # Parse the output to get the membrane mapping
206 mem_map = parse_index(output_ndx_path)
207 os.remove(output_ndx_path)
209 # Process the membrane mapping
210 n_mems = len(mem_map) // 2
211 membrane_map['n_mems'] = n_mems
213 all_lipid_atoms = universe.select_atoms(f'resname {" ".join(lipid_residues)}')
214 no_mem_lipids = set(all_lipid_atoms.atoms.indices)
216 for i in range(n_mems):
217 # FATSLiM indexes start at 1, convert to 0-based
218 bot_atoms = (np.array(mem_map[f'membrane_{i+1}_leaflet_1']) - 1).tolist()
219 top_atoms = (np.array(mem_map[f'membrane_{i+1}_leaflet_2']) - 1).tolist()
221 # Remove assigned atoms from no_mem_lipids
222 no_mem_lipids -= set(bot_atoms)
223 no_mem_lipids -= set(top_atoms)
225 membrane_map['mems'][str(i)] = {
226 'leaflets': {
227 'bot': bot_atoms,
228 'top': top_atoms,
229 },
230 'polar_atoms': {
231 'bot': universe.atoms[bot_atoms].select_atoms('name P*').indices.tolist(),
232 'top': universe.atoms[top_atoms].select_atoms('name P*').indices.tolist(),
233 }
234 }
236 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids))
238 # Print results
239 no_mem_lipids_str = f'{len(no_mem_lipids)} lipid atoms not assigned to any membrane.' if len(no_mem_lipids) > 0 else ''
240 print(f'{n_mems} membrane/s found in coarse-grained system. ' + no_mem_lipids_str)
242 return membrane_map
245def display_membrane_mapping(mem_map: str, pdb: str):
246 try:
247 import nglview as nv # type: ignore
248 except ImportError:
249 raise ImportError("nglview is required for displaying membrane mapping. Please install it using 'pip install nglview'.")
251 mem_map = load_json(mem_map)
252 top = f"@{','.join(map(str,mem_map['mems']['0']['leaflets']['top']))}"
253 bot = f"@{','.join(map(str,mem_map['mems']['0']['leaflets']['bot']))}"
254 no_mem = f"@{','.join(map(str,mem_map['no_mem_lipid']))}"
256 view = nv.show_file(pdb)
257 view.clear(0)
258 #view.clear(1)
259 view.add_point(selection='not protein', color='green', scale=1.5)
260 view.add_point(selection=f'{top}', color='blue')
261 view.add_point(selection=f'{bot}', color='red')
262 view.add_point(selection=no_mem, color='yellow', scale=0.5)
263 return view