Coverage for mddb_workflow / tools / membrane_mapping.py: 79%
120 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1import os
2import numpy as np
3from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes, parse_index
4from mddb_workflow.utils.auxiliar import save_json, load_json
5from mddb_workflow.utils.constants import LIPIDS_RESIDUE_NAMES
6from mddb_workflow.utils.type_hints import *
7from contextlib import redirect_stdout
10def generate_membrane_mapping(
11 inchikeys: dict[str, 'InChIKeyData'],
12 lipid_references: dict[str, dict],
13 structure_file: 'File',
14 universe: 'Universe',
15 output_file: 'File',
16) -> dict:
17 """Generate a list of residue numbers of membrane components from a given structure and topology file.
19 Args:
20 inchikeys: Dictionary mapping InChI keys to InChIKeyData objects.
21 lipid_references: Dictionary mapping InChI keys to lipid database references.
22 structure_file: File object representing the structure file (e.g., PDB, GRO).
23 universe: MDAnalysis Universe object containing the structure and topology.
24 output_file: Output JSON file to save membrane mapping.
26 Returns:
27 dict: A dictionary containing the membrane mapping.
28 - n_mems (int): Number of membranes detected.
29 - mems (dict): Dictionary where each key is a membrane index (str) and value contains:
30 - leaflets (dict): Contains 'top' and 'bot' keys, each with a list of atom indices.
31 - polar_atoms (dict): Contains 'top' and 'bot' keys, each with a list of polar atom indices.
32 - no_mem_lipid (list): List of atom indices for lipids not assigned to any membrane.
34 Notes:
35 - The function identifies lipid and non-lipid residues based on InChI keys.
36 - It classifies residues and checks for potential misclassifications.
37 - Lipid residues are selected and neighboring lipids are found.
38 - Clusters of lipids are identified, and clusters with more than 30 lipids are considered as membranes.
39 - If debug is enabled, the function returns additional information including lipid residues, neighbors, counts, and clusters.
41 """
42 if not universe: raise RuntimeError('Missing universe')
43 # Select only the lipids from the InChIKeyData
44 lipid_map = {key: inchikeys[key] for key in lipid_references.keys()}
45 if 'Cg' in universe.atoms.types:
46 membrane_map = coarse_grain_membranes(structure_file, universe)
47 else:
48 membrane_map = all_atom_membranes(lipid_map, structure_file, universe)
49 if membrane_map['n_mems'] > 0:
50 save_json(membrane_map, output_file.path)
51 return membrane_map
54def all_atom_membranes(
55 lipid_map: dict[str, 'InChIKeyData'],
56 structure_file: 'File',
57 universe: 'Universe',
58) -> dict:
59 """Generate membrane mapping for all-atom systems using FATSLiM."""
60 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []}
62 if len(lipid_map) == 0:
63 # no lipids found in the structure.
64 return membrane_map
66 # Select only the lipids and potential membrane members
67 lipid_ridx = []
68 glclipid_ridx = []
69 for ref in lipid_map.values():
70 lipid_ridx.extend(ref.resindices)
71 if ref.moltype == 'fragment':
72 glclipid_ridx.extend(ref.fragments)
73 # if no lipids are found, we save the empty mapping and return
74 if len(lipid_ridx) == 0:
75 # no lipids found in the structure.
76 return membrane_map
77 mem_candidates = universe.select_atoms(f'(resindex {" ".join(map(str, (lipid_ridx)))})')
79 # for better leaflet assignation we only use polar atoms
80 charges = abs(np.array([atom.charge for atom in universe.atoms]))
81 polar_atoms = []
82 for ridx in mem_candidates.residues.resindices:
83 res = universe.residues[ridx]
84 res_ch = charges[res.atoms.ix]
85 max_ch_idx = np.argmax(res_ch)
86 polar_atoms.append(res.atoms[max_ch_idx].index)
87 polar_atoms = np.array(polar_atoms)
88 headgroup_sel = f'(index {" ".join(map(str, (polar_atoms)))})'
89 # Run FATSLiM to find the membranes
90 prop = {
91 'selection': headgroup_sel,
92 'cutoff': 2.2,
93 'ignore_no_box': True,
94 'disable_logs': True,
95 'return_hydrogen': True,
96 # Do not copy the input file to the sandbox
97 'disable_sandbox': True,
98 }
99 output_ndx_path = "tmp_mem_map.ndx"
100 print(' Running BioBB FATSLiM Membranes')
101 with redirect_stdout(None):
102 fatslim_membranes(input_top_path=structure_file.absolute_path,
103 output_ndx_path=output_ndx_path,
104 properties=prop)
105 # Parse the output to get the membrane mapping
106 mem_map = parse_index(output_ndx_path)
107 os.remove(output_ndx_path)
108 # Save the membrane mapping as a JSON file
109 n_mems = len(mem_map) // 2
110 membrane_map['n_mems'] = n_mems
111 no_mem_lipids = set(mem_candidates.atoms.indices)
112 for i in range(n_mems):
113 # and they are not assigned to any membrane. FATSLiM indexes start at 1
114 bot = (np.array(mem_map[f'membrane_{i + 1}_leaflet_1']) - 1).tolist() # JSON does not support numpy arrays
115 top = (np.array(mem_map[f'membrane_{i + 1}_leaflet_2']) - 1).tolist()
116 top_ridx = universe.atoms[top].residues.resindices
117 bot_ridx = universe.atoms[bot].residues.resindices
118 # Some polar atoms from the glucids are to far from the polar atoms of the lipids
119 top = set(top)
120 bot = set(bot)
121 remove_ridx = []
122 for grp in glclipid_ridx:
123 if np.in1d(grp, top_ridx).any():
124 top.update(universe.residues[grp].atoms.indices)
125 remove_ridx.append(grp)
126 continue
127 if np.in1d(grp, bot_ridx).any():
128 bot.update(universe.residues[grp].atoms.indices)
129 remove_ridx.append(grp)
130 for grp in remove_ridx:
131 glclipid_ridx.remove(grp)
132 # Remove lipids not assigned to any membrane
133 no_mem_lipids -= top
134 no_mem_lipids -= bot
135 # Check in which leaflets each of the polar atoms is and save them
136 membrane_map['mems'][str(i)] = {
137 'leaflets': {
138 'bot': list(map(int, bot)),
139 'top': list(map(int, top))
140 },
141 'polar_atoms': {
142 'bot': polar_atoms[np.in1d(polar_atoms, list(bot))].tolist(),
143 'top': polar_atoms[np.in1d(polar_atoms, list(top))].tolist()
144 }
145 }
147 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids))
148 # Print the results and save the membrane mapping
149 no_mem_lipids_str = f'{len(glclipid_ridx)} lipid/s not assigned to any membrane.' if len(glclipid_ridx) > 0 else ''
150 print(f'{n_mems} membrane/s found. ' + no_mem_lipids_str)
151 return membrane_map
154def coarse_grain_membranes(structure_file: 'File', universe: 'Universe') -> dict:
155 """Generate membrane mapping for coarse-grained systems using residue names and P atoms as headgroups."""
156 membrane_map = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': []}
158 # Find all lipid residues in the system
159 lipid_residues = []
160 for resname in LIPIDS_RESIDUE_NAMES:
161 lipids = universe.select_atoms(f'resname {resname}')
162 if len(lipids) > 0:
163 lipid_residues.append(resname)
165 if len(lipid_residues) == 0:
166 print("No coarse-grained lipid residues found in the structure.")
167 return membrane_map
169 # Select P atoms (headgroups) from lipid residues
170 headgroup_atoms = f'resname {" ".join(lipid_residues)} and name P*'
172 # Run FATSLiM to find the membranes
173 prop = {
174 'selection': headgroup_atoms,
175 'cutoff': 2.5, # Slightly larger cutoff for CG systems
176 'ignore_no_box': True,
177 'disable_logs': True,
178 'disable_sandbox': True,
179 }
181 output_ndx_path = "tmp_cg_mem_map.ndx"
182 print(' Running BioBB FATSLiM Membranes for coarse-grained system')
184 # with redirect_stdout(None):
185 fatslim_membranes(input_top_path=structure_file.absolute_path,
186 output_ndx_path=output_ndx_path,
187 properties=prop)
189 # Parse the output to get the membrane mapping
190 mem_map = parse_index(output_ndx_path)
191 os.remove(output_ndx_path)
193 # Process the membrane mapping
194 n_mems = len(mem_map) // 2
195 membrane_map['n_mems'] = n_mems
197 all_lipid_atoms = universe.select_atoms(f'resname {" ".join(lipid_residues)}')
198 no_mem_lipids = set(all_lipid_atoms.atoms.indices)
200 for i in range(n_mems):
201 # FATSLiM indexes start at 1, convert to 0-based
202 bot_atoms = (np.array(mem_map[f'membrane_{i + 1}_leaflet_1']) - 1).tolist()
203 top_atoms = (np.array(mem_map[f'membrane_{i + 1}_leaflet_2']) - 1).tolist()
205 # Remove assigned atoms from no_mem_lipids
206 no_mem_lipids -= set(bot_atoms)
207 no_mem_lipids -= set(top_atoms)
209 membrane_map['mems'][str(i)] = {
210 'leaflets': {
211 'bot': bot_atoms,
212 'top': top_atoms
213 },
214 'polar_atoms': {
215 'bot': universe.atoms[bot_atoms].select_atoms('name P*').indices.tolist(),
216 'top': universe.atoms[top_atoms].select_atoms('name P*').indices.tolist()
217 }
218 }
220 membrane_map['no_mem_lipid'] = list(map(int, no_mem_lipids))
222 # Print results
223 no_mem_lipids_str = f'{len(no_mem_lipids)} lipid atoms not assigned to any membrane.' if len(no_mem_lipids) > 0 else ''
224 print(f'{n_mems} membrane/s found in coarse-grained system. ' + no_mem_lipids_str)
226 return membrane_map
229def display_membrane_mapping(mem_map: str, pdb: str):
230 """Display the membrane mapping using nglview."""
231 try:
232 import nglview as nv # type: ignore
233 except ImportError:
234 raise ImportError("nglview is required for displaying membrane mapping. Please install it using 'pip install nglview'.")
236 mem_map = load_json(mem_map)
237 top = f"@{','.join(map(str, mem_map['mems']['0']['leaflets']['top']))}"
238 bot = f"@{','.join(map(str, mem_map['mems']['0']['leaflets']['bot']))}"
239 no_mem = f"@{','.join(map(str, mem_map['no_mem_lipid']))}"
241 view = nv.show_file(pdb)
242 view.clear(0)
243 # view.clear(1)
244 view.add_point(selection='not protein', color='green', scale=1.5)
245 view.add_point(selection=f'{top}', color='blue')
246 view.add_point(selection=f'{bot}', color='red')
247 view.add_point(selection=no_mem, color='yellow', scale=0.5)
248 return view