Coverage for mddb_workflow / analyses / lipid_order.py: 79%
111 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step
2from mddb_workflow.utils.auxiliar import save_json, load_json
3from mddb_workflow.utils.constants import OUTPUT_LIPID_ORDER_FILENAME
4from mddb_workflow.utils.type_hints import *
5import numpy as np
6import MDAnalysis
9def lipid_order(
10 universe: 'MDAnalysis.Universe',
11 output_directory: str,
12 membrane_map: dict,
13 inchikey_map: list[dict],
14 cg_residues: list[int],
15 snapshots: int,
16 frames_limit: int = 100
17):
18 """Calculate lipid order parameters for membranes.
19 This function computes the order parameter (S) for lipid acyl chains, defined as:
20 S = 0.5*(3*<cos²θ> - 1)
21 where θ is the angle between the C-H bond and the membrane normal (z-axis).
23 Notes:
24 - For non-cholesterol lipids, analyzes all acyl chains separately.
25 - For cholesterol, uses carbon atoms bonded to hydrogen.
26 - The order parameter is calculated for each carbon atom in the chains.
28 """
29 if membrane_map is None or membrane_map['n_mems'] == 0:
30 print('-> Skipping lipid order analysis')
31 return
32 if len(cg_residues) > 0:
33 # RUBEN: se puede hacer con gorder pero hace falta topology con bonds
34 print('-> Skipping lipid order analysis. Not implemented for CG models')
35 return
36 # Set the main output filepath
37 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_ORDER_FILENAME}'
39 order_parameters_dict = {}
40 frame_step, _ = calculate_frame_step(snapshots, frames_limit)
41 for ref in inchikey_map:
42 if not ref['is_lipid']: continue
43 inchikey = ref['match']['ref']['inchikey']
44 # Take the first residue of the reference
45 res = universe.residues[ref['residue_indices'][0]]
46 # If not the cholesterol inchikey
47 if inchikey != 'HVYWMOMLDIMFJA-DPAQBDIFSA-N':
48 # Lipids can have multiple acyl chains
49 carbon_groups = get_all_acyl_chains(res)
50 else:
51 carbon_groups = [res.atoms.select_atoms(
52 'element C and bonded element H').indices]
54 order_parameters_dict[inchikey] = {}
55 # For every 'tail'
56 for chain_idx, group in enumerate(carbon_groups):
57 atoms = res.universe.atoms[group]
58 C_names = sorted([atom.name for atom in atoms],
59 key=natural_sort_key)
60 # Find all C-H bonds indices
61 ch_pairs = find_CH_bonds(universe, ref["residue_indices"], C_names)
62 # Initialize the order parameters to sum over the trajectory
63 order_parameters = []
64 costheta_sums = {C_name: np.zeros(
65 len(ch_pairs[C_name]['C'])) for C_name in C_names}
66 n = 0
67 # Loop over the trajectory
68 for ts in universe.trajectory[0:snapshots:frame_step]:
69 for C_name in C_names:
70 d = universe.atoms[ch_pairs[C_name]['C']].positions - \
71 universe.atoms[ch_pairs[C_name]['H']].positions
72 costheta_sums[C_name] += d[:, 2]**2 / \
73 np.linalg.norm(d, axis=1)**2
74 n += 1
75 order_parameters = 1.5 * \
76 np.array([costheta_sums[C_name].mean()
77 for C_name in C_names]) / n - 0.5
78 serrors = 1.5 * np.array([costheta_sums[C_name].std()
79 for C_name in C_names]) / n
80 order_parameters_dict[inchikey][str(chain_idx)] = {
81 'atoms': C_names,
82 'avg': order_parameters.tolist(),
83 'std': serrors.tolist()}
84 # Save the data
85 data = {'data': order_parameters_dict}
86 save_json(data, output_analysis_filepath)
89def get_all_acyl_chains(residue: 'MDAnalysis.Residue') -> list[list[int]]:
90 """Find all groups of connected Carbon atoms within a residue, including cyclic structures.
92 Args:
93 residue (MDAnalysis.core.groups.Residue): The residue to analyze
95 Returns:
96 list
97 A list of lists, where each inner list contains atom indices of
98 connected Carbon atoms forming a distinct group
100 """
101 def explore_carbon_group(start_atom, visited):
102 """Explore a connected group of carbons."""
103 to_visit = [start_atom]
104 group = set()
105 while to_visit:
106 current_atom = to_visit.pop(0)
107 if current_atom.index in visited:
108 continue
109 visited.add(current_atom.index)
110 if current_atom.element == 'C':
111 group.add(current_atom.index)
112 # Add all bonded carbon atoms to the visit queue
113 for bond in current_atom.bonds:
114 for bonded_atom in bond.atoms:
115 if (bonded_atom.element == 'C' and
116 bonded_atom.index not in visited):
117 to_visit.append(bonded_atom)
118 return list(group)
120 # Get all Carbon atoms in the residue
121 carbon_atoms = residue.atoms.select_atoms('element C')
122 visited = set()
123 # Remove ester carbons (without hydrogens)
124 for at in carbon_atoms:
125 if 'H' not in at.bonded_atoms.elements:
126 visited.add(at.index)
127 carbon_groups = []
128 # Find all distinct groups of connected carbons
129 for carbon in carbon_atoms:
130 if carbon.index not in visited:
131 # Get all carbons connected to this one
132 connected_carbons = explore_carbon_group(carbon, visited)
133 if len(connected_carbons) > 6: # Only add non-empty groups
134 carbon_groups.append(sorted(connected_carbons))
135 return carbon_groups
138def find_CH_bonds(universe, lipid_resindices, atom_names):
139 """Find all carbon-hydrogen bonds in a lipid molecule using topology information.
141 Args:
142 universe: MDAnalysis Universe object with topology information
143 lipid_resindices: List of residue indices corresponding to the lipid
144 atom_names: List of carbon atom names to search for C-H bonds
146 Returns:
147 dict: Dict for each carbon, of tuples containing (carbon_index, hydrogen_index) for each C-H bond
149 """
150 # Get all carbons in the lipid
151 carbons = universe.residues[lipid_resindices].atoms.select_atoms(
152 'name ' + ' '.join(atom_names))
153 # Get how many carbons with different name that we will average over later
154 ch_pairs = {}
155 for name in atom_names:
156 ch_pairs[name] = {'C': [], 'H': []}
158 # Iterate through each carbon
159 for carbon in carbons:
160 # Get all bonds for this carbon
161 for bond in carbon.bonds:
162 # Check if the bond is between C and H
163 atoms = bond.atoms
164 at1_nm = atoms[0].name
165 at2_nm = atoms[1].name
166 if ('C' in at1_nm and 'H' in at2_nm):
167 # Store the indices making sure carbon is first
168 ch_pairs[carbon.name]['C'].append(atoms[0].index)
169 ch_pairs[carbon.name]['H'].append(atoms[1].index)
170 elif 'H' in at1_nm and 'C' in at2_nm:
171 ch_pairs[carbon.name]['C'].append(atoms[1].index)
172 ch_pairs[carbon.name]['H'].append(atoms[0].index)
173 # Convert lists to numpy arrays
174 for name in ch_pairs:
175 ch_pairs[name]['C'] = np.array(ch_pairs[name]['C'])
176 ch_pairs[name]['H'] = np.array(ch_pairs[name]['H'])
177 return ch_pairs
180def natural_sort_key(s):
181 """Sort strings that contain numbers in human order (C1, C2, C3 instead of C1, C11, C2)."""
182 import re
183 return [int(text) if text.isdigit() else text.lower()
184 for text in re.split('([0-9]+)', s)]
187def plot_lipid_order(data_filepath: str):
188 """Visualize the lipid order analysis by plotting the order parameters.
190 Args:
191 data_filepath (str): Path to the JSON file containing the lipid order analysis results.
193 """
194 import matplotlib.pyplot as plt
196 # Load the lipid order data
197 data = load_json(data_filepath)
198 order_parameters_dict = data.get('data', {})
200 # Create output directory if it doesn't exist
202 # Iterate over each lipid type
203 for inchikey, chains in order_parameters_dict.items():
204 plt.figure(figsize=(10, 6))
205 for chain_idx, chain_data in chains.items():
206 avg = chain_data['avg']
207 std = chain_data['std']
209 # Use the index of the atom to align both chains in the graph
210 atom_indices = range(len(avg))
212 # Plot the order parameters with error bars for each chain
213 eb = plt.errorbar(atom_indices, avg, yerr=std, fmt='o-', label=f'Chain {chain_idx}')
214 eb[-1][0].set_linestyle('--')
216 plt.xlabel('Atom Index')
217 plt.ylabel('Order Parameter (S)')
218 plt.title(f'Lipid Order Parameters for {inchikey}')
219 plt.xticks(rotation=45, ha='right')
220 plt.tight_layout()
221 plt.legend()
222 plt.show()