Coverage for mddb_workflow/analyses/lipid_order.py: 79%
110 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step
2from mddb_workflow.utils.auxiliar import save_json, load_json
3from mddb_workflow.utils.constants import OUTPUT_LIPID_ORDER_FILENAME
4from mddb_workflow.utils.type_hints import *
5import numpy as np
6import MDAnalysis
9def lipid_order(
10 universe: 'MDAnalysis.Universe',
11 output_directory: str,
12 membrane_map: dict,
13 lipid_map: dict,
14 cg_residues: list[int],
15 snapshots: int,
16 frames_limit: int = 100):
17 """
18 Calculate lipid order parameters for membranes.
19 This function computes the order parameter (S) for lipid acyl chains, defined as:
20 S = 0.5*(3*<cos²θ> - 1)
21 where θ is the angle between the C-H bond and the membrane normal (z-axis).
23 Notes:
24 - For non-cholesterol lipids, analyzes all acyl chains separately.
25 - For cholesterol, uses carbon atoms bonded to hydrogen.
26 - The order parameter is calculated for each carbon atom in the chain.
27 """
28 if membrane_map is None or membrane_map['n_mems'] == 0:
29 print('-> Skipping lipid order analysis')
30 return
31 if len(cg_residues)>0:
32 # RUBEN: se puede hacer con gorder pero hace falta topology con bonds
33 print('-> Skipping lipid order analysis. Not implemented for CG models')
34 return
35 # Set the main output filepath
36 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_ORDER_FILENAME}'
38 order_parameters_dict = {}
39 frame_step, _ = calculate_frame_step(snapshots, frames_limit)
40 for ref in lipid_map:
41 inchikey = ref['match']['ref']['inchikey']
42 # Take the first residue of the reference
43 res = universe.residues[ref['residue_indices'][0]]
44 # If not the cholesterol inchikey
45 if inchikey != 'HVYWMOMLDIMFJA-DPAQBDIFSA-N':
46 # Lipids can have multiple acyl chains
47 carbon_groups = get_all_acyl_chains(res)
48 else:
49 carbon_groups = [res.atoms.select_atoms(
50 'element C and bonded element H').indices]
52 order_parameters_dict[inchikey] = {}
53 # For every 'tail'
54 for chain_idx, group in enumerate(carbon_groups):
55 atoms = res.universe.atoms[group]
56 C_names = sorted([atom.name for atom in atoms],
57 key=natural_sort_key)
58 # Find all C-H bonds indices
59 ch_pairs = find_CH_bonds(universe, ref["residue_indices"], C_names)
60 # Initialize the order parameters to sum over the trajectory
61 order_parameters = []
62 costheta_sums = {C_name: np.zeros(
63 len(ch_pairs[C_name]['C'])) for C_name in C_names}
64 n = 0
65 # Loop over the trajectory
66 for ts in universe.trajectory[0:snapshots:frame_step]:
67 for C_name in C_names:
68 d = universe.atoms[ch_pairs[C_name]['C']].positions - \
69 universe.atoms[ch_pairs[C_name]['H']].positions
70 costheta_sums[C_name] += d[:, 2]**2 / \
71 np.linalg.norm(d, axis=1)**2
72 n += 1
73 order_parameters = 1.5 * \
74 np.array([costheta_sums[C_name].mean()
75 for C_name in C_names])/n - 0.5
76 serrors = 1.5 * np.array([costheta_sums[C_name].std()
77 for C_name in C_names])/n
78 order_parameters_dict[inchikey][str(chain_idx)] = {
79 'atoms': C_names,
80 'avg': order_parameters.tolist(),
81 'std': serrors.tolist()}
82 # Save the data
83 data = {'data': order_parameters_dict}
84 save_json(data, output_analysis_filepath)
87def get_all_acyl_chains(residue: 'MDAnalysis.Residue') -> list[list[int]]:
88 """
89 Finds all groups of connected Carbon atoms within a residue, including cyclic structures.
91 Parameters
92 ----------
93 residue : MDAnalysis.core.groups.Residue
94 The residue to analyze
96 Returns
97 -------
98 list
99 A list of lists, where each inner list contains atom indices of
100 connected Carbon atoms forming a distinct group
101 """
102 def explore_carbon_group(start_atom, visited):
103 """Helper function to explore a connected group of carbons"""
104 to_visit = [start_atom]
105 group = set()
106 while to_visit:
107 current_atom = to_visit.pop(0)
108 if current_atom.index in visited:
109 continue
110 visited.add(current_atom.index)
111 if current_atom.element == 'C':
112 group.add(current_atom.index)
113 # Add all bonded carbon atoms to the visit queue
114 for bond in current_atom.bonds:
115 for bonded_atom in bond.atoms:
116 if (bonded_atom.element == 'C' and
117 bonded_atom.index not in visited):
118 to_visit.append(bonded_atom)
119 return list(group)
121 # Get all Carbon atoms in the residue
122 carbon_atoms = residue.atoms.select_atoms('element C')
123 visited = set()
124 # Remove ester carbons (without hydrogens)
125 for at in carbon_atoms:
126 if 'H' not in at.bonded_atoms.elements:
127 visited.add(at.index)
128 carbon_groups = []
129 # Find all distinct groups of connected carbons
130 for carbon in carbon_atoms:
131 if carbon.index not in visited:
132 # Get all carbons connected to this one
133 connected_carbons = explore_carbon_group(carbon, visited)
134 if len(connected_carbons) > 6: # Only add non-empty groups
135 carbon_groups.append(sorted(connected_carbons))
136 return carbon_groups
139def find_CH_bonds(universe, lipid_resindices, atom_names):
140 """
141 Find all carbon-hydrogen bonds in a lipid molecule using topology information.
143 Args:
144 universe: MDAnalysis Universe object with topology information
145 lipid_resname: Residue name of the lipid (default: 'DPPC')
147 Returns:
148 dict: Dict for each carbon, of tuples containing (carbon_index, hydrogen_index) for each C-H bond
149 """
150 # Get all carbons in the lipid
151 carbons = universe.residues[lipid_resindices].atoms.select_atoms(
152 'name ' + ' '.join(atom_names))
153 # Get how many carbons with different name that we will average over later
154 ch_pairs = {}
155 for name in atom_names:
156 ch_pairs[name] = {'C': [], 'H': []}
158 # Iterate through each carbon
159 for carbon in carbons:
160 # Get all bonds for this carbon
161 for bond in carbon.bonds:
162 # Check if the bond is between C and H
163 atoms = bond.atoms
164 at1_nm = atoms[0].name
165 at2_nm = atoms[1].name
166 if ('C' in at1_nm and 'H' in at2_nm):
167 # Store the indices making sure carbon is first
168 ch_pairs[carbon.name]['C'].append(atoms[0].index)
169 ch_pairs[carbon.name]['H'].append(atoms[1].index)
170 elif 'H' in at1_nm and 'C' in at2_nm:
171 ch_pairs[carbon.name]['C'].append(atoms[1].index)
172 ch_pairs[carbon.name]['H'].append(atoms[0].index)
173 # Convert lists to numpy arrays
174 for name in ch_pairs:
175 ch_pairs[name]['C'] = np.array(ch_pairs[name]['C'])
176 ch_pairs[name]['H'] = np.array(ch_pairs[name]['H'])
177 return ch_pairs
180def natural_sort_key(s):
181 """Sort strings that contain numbers in human order (C1, C2, C3 instead of C1, C11, C2)"""
182 import re
183 return [int(text) if text.isdigit() else text.lower()
184 for text in re.split('([0-9]+)', s)]
187def plot_lipid_order(data_filepath: str):
188 """
189 Visualize the lipid order analysis by plotting the order parameters.
191 Parameters
192 ----------
193 data_filepath : str
194 Path to the JSON file containing the lipid order analysis results.
195 """
196 import matplotlib.pyplot as plt
198 # Load the lipid order data
199 data = load_json(data_filepath)
200 order_parameters_dict = data.get('data', {})
202 # Create output directory if it doesn't exist
204 # Iterate over each lipid type
205 for inchikey, chains in order_parameters_dict.items():
206 plt.figure(figsize=(10, 6))
207 for chain_idx, chain_data in chains.items():
208 avg = chain_data['avg']
209 std = chain_data['std']
211 # Use the index of the atom to align both chains in the graph
212 atom_indices = range(len(avg))
214 # Plot the order parameters with error bars for each chain
215 eb=plt.errorbar(atom_indices, avg, yerr=std, fmt='o-', label=f'Chain {chain_idx}')
216 eb[-1][0].set_linestyle('--')
218 plt.xlabel('Atom Index')
219 plt.ylabel('Order Parameter (S)')
220 plt.title(f'Lipid Order Parameters for {inchikey}')
221 plt.xticks(rotation=45, ha='right')
222 plt.tight_layout()
223 plt.legend()
224 plt.show()