Coverage for mddb_workflow / analyses / lipid_order.py: 79%

111 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step 

2from mddb_workflow.utils.auxiliar import save_json, load_json 

3from mddb_workflow.utils.constants import OUTPUT_LIPID_ORDER_FILENAME 

4from mddb_workflow.utils.type_hints import * 

5import numpy as np 

6import MDAnalysis 

7 

8 

9def lipid_order( 

10 universe: 'MDAnalysis.Universe', 

11 output_directory: str, 

12 membrane_map: dict, 

13 inchikey_map: list[dict], 

14 cg_residues: list[int], 

15 snapshots: int, 

16 frames_limit: int = 100 

17): 

18 """Calculate lipid order parameters for membranes. 

19 This function computes the order parameter (S) for lipid acyl chains, defined as: 

20 S = 0.5*(3*<cos²θ> - 1) 

21 where θ is the angle between the C-H bond and the membrane normal (z-axis). 

22 

23 Notes: 

24 - For non-cholesterol lipids, analyzes all acyl chains separately. 

25 - For cholesterol, uses carbon atoms bonded to hydrogen. 

26 - The order parameter is calculated for each carbon atom in the chains. 

27 

28 """ 

29 if membrane_map is None or membrane_map['n_mems'] == 0: 

30 print('-> Skipping lipid order analysis') 

31 return 

32 if len(cg_residues) > 0: 

33 # RUBEN: se puede hacer con gorder pero hace falta topology con bonds 

34 print('-> Skipping lipid order analysis. Not implemented for CG models') 

35 return 

36 # Set the main output filepath 

37 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_ORDER_FILENAME}' 

38 

39 order_parameters_dict = {} 

40 frame_step, _ = calculate_frame_step(snapshots, frames_limit) 

41 for ref in inchikey_map: 

42 if not ref['is_lipid']: continue 

43 inchikey = ref['match']['ref']['inchikey'] 

44 # Take the first residue of the reference 

45 res = universe.residues[ref['residue_indices'][0]] 

46 # If not the cholesterol inchikey 

47 if inchikey != 'HVYWMOMLDIMFJA-DPAQBDIFSA-N': 

48 # Lipids can have multiple acyl chains 

49 carbon_groups = get_all_acyl_chains(res) 

50 else: 

51 carbon_groups = [res.atoms.select_atoms( 

52 'element C and bonded element H').indices] 

53 

54 order_parameters_dict[inchikey] = {} 

55 # For every 'tail' 

56 for chain_idx, group in enumerate(carbon_groups): 

57 atoms = res.universe.atoms[group] 

58 C_names = sorted([atom.name for atom in atoms], 

59 key=natural_sort_key) 

60 # Find all C-H bonds indices 

61 ch_pairs = find_CH_bonds(universe, ref["residue_indices"], C_names) 

62 # Initialize the order parameters to sum over the trajectory 

63 order_parameters = [] 

64 costheta_sums = {C_name: np.zeros( 

65 len(ch_pairs[C_name]['C'])) for C_name in C_names} 

66 n = 0 

67 # Loop over the trajectory 

68 for ts in universe.trajectory[0:snapshots:frame_step]: 

69 for C_name in C_names: 

70 d = universe.atoms[ch_pairs[C_name]['C']].positions - \ 

71 universe.atoms[ch_pairs[C_name]['H']].positions 

72 costheta_sums[C_name] += d[:, 2]**2 / \ 

73 np.linalg.norm(d, axis=1)**2 

74 n += 1 

75 order_parameters = 1.5 * \ 

76 np.array([costheta_sums[C_name].mean() 

77 for C_name in C_names]) / n - 0.5 

78 serrors = 1.5 * np.array([costheta_sums[C_name].std() 

79 for C_name in C_names]) / n 

80 order_parameters_dict[inchikey][str(chain_idx)] = { 

81 'atoms': C_names, 

82 'avg': order_parameters.tolist(), 

83 'std': serrors.tolist()} 

84 # Save the data 

85 data = {'data': order_parameters_dict} 

86 save_json(data, output_analysis_filepath) 

87 

88 

89def get_all_acyl_chains(residue: 'MDAnalysis.Residue') -> list[list[int]]: 

90 """Find all groups of connected Carbon atoms within a residue, including cyclic structures. 

91 

92 Args: 

93 residue (MDAnalysis.core.groups.Residue): The residue to analyze 

94 

95 Returns: 

96 list 

97 A list of lists, where each inner list contains atom indices of 

98 connected Carbon atoms forming a distinct group 

99 

100 """ 

101 def explore_carbon_group(start_atom, visited): 

102 """Explore a connected group of carbons.""" 

103 to_visit = [start_atom] 

104 group = set() 

105 while to_visit: 

106 current_atom = to_visit.pop(0) 

107 if current_atom.index in visited: 

108 continue 

109 visited.add(current_atom.index) 

110 if current_atom.element == 'C': 

111 group.add(current_atom.index) 

112 # Add all bonded carbon atoms to the visit queue 

113 for bond in current_atom.bonds: 

114 for bonded_atom in bond.atoms: 

115 if (bonded_atom.element == 'C' and 

116 bonded_atom.index not in visited): 

117 to_visit.append(bonded_atom) 

118 return list(group) 

119 

120 # Get all Carbon atoms in the residue 

121 carbon_atoms = residue.atoms.select_atoms('element C') 

122 visited = set() 

123 # Remove ester carbons (without hydrogens) 

124 for at in carbon_atoms: 

125 if 'H' not in at.bonded_atoms.elements: 

126 visited.add(at.index) 

127 carbon_groups = [] 

128 # Find all distinct groups of connected carbons 

129 for carbon in carbon_atoms: 

130 if carbon.index not in visited: 

131 # Get all carbons connected to this one 

132 connected_carbons = explore_carbon_group(carbon, visited) 

133 if len(connected_carbons) > 6: # Only add non-empty groups 

134 carbon_groups.append(sorted(connected_carbons)) 

135 return carbon_groups 

136 

137 

138def find_CH_bonds(universe, lipid_resindices, atom_names): 

139 """Find all carbon-hydrogen bonds in a lipid molecule using topology information. 

140 

141 Args: 

142 universe: MDAnalysis Universe object with topology information 

143 lipid_resindices: List of residue indices corresponding to the lipid 

144 atom_names: List of carbon atom names to search for C-H bonds 

145 

146 Returns: 

147 dict: Dict for each carbon, of tuples containing (carbon_index, hydrogen_index) for each C-H bond 

148 

149 """ 

150 # Get all carbons in the lipid 

151 carbons = universe.residues[lipid_resindices].atoms.select_atoms( 

152 'name ' + ' '.join(atom_names)) 

153 # Get how many carbons with different name that we will average over later 

154 ch_pairs = {} 

155 for name in atom_names: 

156 ch_pairs[name] = {'C': [], 'H': []} 

157 

158 # Iterate through each carbon 

159 for carbon in carbons: 

160 # Get all bonds for this carbon 

161 for bond in carbon.bonds: 

162 # Check if the bond is between C and H 

163 atoms = bond.atoms 

164 at1_nm = atoms[0].name 

165 at2_nm = atoms[1].name 

166 if ('C' in at1_nm and 'H' in at2_nm): 

167 # Store the indices making sure carbon is first 

168 ch_pairs[carbon.name]['C'].append(atoms[0].index) 

169 ch_pairs[carbon.name]['H'].append(atoms[1].index) 

170 elif 'H' in at1_nm and 'C' in at2_nm: 

171 ch_pairs[carbon.name]['C'].append(atoms[1].index) 

172 ch_pairs[carbon.name]['H'].append(atoms[0].index) 

173 # Convert lists to numpy arrays 

174 for name in ch_pairs: 

175 ch_pairs[name]['C'] = np.array(ch_pairs[name]['C']) 

176 ch_pairs[name]['H'] = np.array(ch_pairs[name]['H']) 

177 return ch_pairs 

178 

179 

180def natural_sort_key(s): 

181 """Sort strings that contain numbers in human order (C1, C2, C3 instead of C1, C11, C2).""" 

182 import re 

183 return [int(text) if text.isdigit() else text.lower() 

184 for text in re.split('([0-9]+)', s)] 

185 

186 

187def plot_lipid_order(data_filepath: str): 

188 """Visualize the lipid order analysis by plotting the order parameters. 

189 

190 Args: 

191 data_filepath (str): Path to the JSON file containing the lipid order analysis results. 

192 

193 """ 

194 import matplotlib.pyplot as plt 

195 

196 # Load the lipid order data 

197 data = load_json(data_filepath) 

198 order_parameters_dict = data.get('data', {}) 

199 

200 # Create output directory if it doesn't exist 

201 

202 # Iterate over each lipid type 

203 for inchikey, chains in order_parameters_dict.items(): 

204 plt.figure(figsize=(10, 6)) 

205 for chain_idx, chain_data in chains.items(): 

206 avg = chain_data['avg'] 

207 std = chain_data['std'] 

208 

209 # Use the index of the atom to align both chains in the graph 

210 atom_indices = range(len(avg)) 

211 

212 # Plot the order parameters with error bars for each chain 

213 eb = plt.errorbar(atom_indices, avg, yerr=std, fmt='o-', label=f'Chain {chain_idx}') 

214 eb[-1][0].set_linestyle('--') 

215 

216 plt.xlabel('Atom Index') 

217 plt.ylabel('Order Parameter (S)') 

218 plt.title(f'Lipid Order Parameters for {inchikey}') 

219 plt.xticks(rotation=45, ha='right') 

220 plt.tight_layout() 

221 plt.legend() 

222 plt.show()