Coverage for mddb_workflow/analyses/lipid_order.py: 79%

110 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step 

2from mddb_workflow.utils.auxiliar import save_json, load_json 

3from mddb_workflow.utils.constants import OUTPUT_LIPID_ORDER_FILENAME 

4from mddb_workflow.utils.type_hints import * 

5import numpy as np 

6import MDAnalysis 

7 

8 

9def lipid_order( 

10 universe: 'MDAnalysis.Universe', 

11 output_directory: str, 

12 membrane_map: dict, 

13 lipid_map: dict, 

14 cg_residues: list[int], 

15 snapshots: int, 

16 frames_limit: int = 100): 

17 """ 

18 Calculate lipid order parameters for membranes. 

19 This function computes the order parameter (S) for lipid acyl chains, defined as: 

20 S = 0.5*(3*<cos²θ> - 1) 

21 where θ is the angle between the C-H bond and the membrane normal (z-axis). 

22 

23 Notes: 

24 - For non-cholesterol lipids, analyzes all acyl chains separately. 

25 - For cholesterol, uses carbon atoms bonded to hydrogen. 

26 - The order parameter is calculated for each carbon atom in the chain. 

27 """ 

28 if membrane_map is None or membrane_map['n_mems'] == 0: 

29 print('-> Skipping lipid order analysis') 

30 return 

31 if len(cg_residues)>0: 

32 # RUBEN: se puede hacer con gorder pero hace falta topology con bonds 

33 print('-> Skipping lipid order analysis. Not implemented for CG models') 

34 return 

35 # Set the main output filepath 

36 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_ORDER_FILENAME}' 

37 

38 order_parameters_dict = {} 

39 frame_step, _ = calculate_frame_step(snapshots, frames_limit) 

40 for ref in lipid_map: 

41 inchikey = ref['match']['ref']['inchikey'] 

42 # Take the first residue of the reference 

43 res = universe.residues[ref['residue_indices'][0]] 

44 # If not the cholesterol inchikey 

45 if inchikey != 'HVYWMOMLDIMFJA-DPAQBDIFSA-N': 

46 # Lipids can have multiple acyl chains 

47 carbon_groups = get_all_acyl_chains(res) 

48 else: 

49 carbon_groups = [res.atoms.select_atoms( 

50 'element C and bonded element H').indices] 

51 

52 order_parameters_dict[inchikey] = {} 

53 # For every 'tail' 

54 for chain_idx, group in enumerate(carbon_groups): 

55 atoms = res.universe.atoms[group] 

56 C_names = sorted([atom.name for atom in atoms], 

57 key=natural_sort_key) 

58 # Find all C-H bonds indices 

59 ch_pairs = find_CH_bonds(universe, ref["residue_indices"], C_names) 

60 # Initialize the order parameters to sum over the trajectory 

61 order_parameters = [] 

62 costheta_sums = {C_name: np.zeros( 

63 len(ch_pairs[C_name]['C'])) for C_name in C_names} 

64 n = 0 

65 # Loop over the trajectory 

66 for ts in universe.trajectory[0:snapshots:frame_step]: 

67 for C_name in C_names: 

68 d = universe.atoms[ch_pairs[C_name]['C']].positions - \ 

69 universe.atoms[ch_pairs[C_name]['H']].positions 

70 costheta_sums[C_name] += d[:, 2]**2 / \ 

71 np.linalg.norm(d, axis=1)**2 

72 n += 1 

73 order_parameters = 1.5 * \ 

74 np.array([costheta_sums[C_name].mean() 

75 for C_name in C_names])/n - 0.5 

76 serrors = 1.5 * np.array([costheta_sums[C_name].std() 

77 for C_name in C_names])/n 

78 order_parameters_dict[inchikey][str(chain_idx)] = { 

79 'atoms': C_names, 

80 'avg': order_parameters.tolist(), 

81 'std': serrors.tolist()} 

82 # Save the data 

83 data = {'data': order_parameters_dict} 

84 save_json(data, output_analysis_filepath) 

85 

86 

87def get_all_acyl_chains(residue: 'MDAnalysis.Residue') -> list[list[int]]: 

88 """ 

89 Finds all groups of connected Carbon atoms within a residue, including cyclic structures. 

90 

91 Parameters 

92 ---------- 

93 residue : MDAnalysis.core.groups.Residue 

94 The residue to analyze 

95 

96 Returns 

97 ------- 

98 list 

99 A list of lists, where each inner list contains atom indices of  

100 connected Carbon atoms forming a distinct group 

101 """ 

102 def explore_carbon_group(start_atom, visited): 

103 """Helper function to explore a connected group of carbons""" 

104 to_visit = [start_atom] 

105 group = set() 

106 while to_visit: 

107 current_atom = to_visit.pop(0) 

108 if current_atom.index in visited: 

109 continue 

110 visited.add(current_atom.index) 

111 if current_atom.element == 'C': 

112 group.add(current_atom.index) 

113 # Add all bonded carbon atoms to the visit queue 

114 for bond in current_atom.bonds: 

115 for bonded_atom in bond.atoms: 

116 if (bonded_atom.element == 'C' and 

117 bonded_atom.index not in visited): 

118 to_visit.append(bonded_atom) 

119 return list(group) 

120 

121 # Get all Carbon atoms in the residue 

122 carbon_atoms = residue.atoms.select_atoms('element C') 

123 visited = set() 

124 # Remove ester carbons (without hydrogens) 

125 for at in carbon_atoms: 

126 if 'H' not in at.bonded_atoms.elements: 

127 visited.add(at.index) 

128 carbon_groups = [] 

129 # Find all distinct groups of connected carbons 

130 for carbon in carbon_atoms: 

131 if carbon.index not in visited: 

132 # Get all carbons connected to this one 

133 connected_carbons = explore_carbon_group(carbon, visited) 

134 if len(connected_carbons) > 6: # Only add non-empty groups 

135 carbon_groups.append(sorted(connected_carbons)) 

136 return carbon_groups 

137 

138 

139def find_CH_bonds(universe, lipid_resindices, atom_names): 

140 """ 

141 Find all carbon-hydrogen bonds in a lipid molecule using topology information. 

142 

143 Args: 

144 universe: MDAnalysis Universe object with topology information 

145 lipid_resname: Residue name of the lipid (default: 'DPPC') 

146 

147 Returns: 

148 dict: Dict for each carbon, of tuples containing (carbon_index, hydrogen_index) for each C-H bond 

149 """ 

150 # Get all carbons in the lipid 

151 carbons = universe.residues[lipid_resindices].atoms.select_atoms( 

152 'name ' + ' '.join(atom_names)) 

153 # Get how many carbons with different name that we will average over later 

154 ch_pairs = {} 

155 for name in atom_names: 

156 ch_pairs[name] = {'C': [], 'H': []} 

157 

158 # Iterate through each carbon 

159 for carbon in carbons: 

160 # Get all bonds for this carbon 

161 for bond in carbon.bonds: 

162 # Check if the bond is between C and H 

163 atoms = bond.atoms 

164 at1_nm = atoms[0].name 

165 at2_nm = atoms[1].name 

166 if ('C' in at1_nm and 'H' in at2_nm): 

167 # Store the indices making sure carbon is first 

168 ch_pairs[carbon.name]['C'].append(atoms[0].index) 

169 ch_pairs[carbon.name]['H'].append(atoms[1].index) 

170 elif 'H' in at1_nm and 'C' in at2_nm: 

171 ch_pairs[carbon.name]['C'].append(atoms[1].index) 

172 ch_pairs[carbon.name]['H'].append(atoms[0].index) 

173 # Convert lists to numpy arrays 

174 for name in ch_pairs: 

175 ch_pairs[name]['C'] = np.array(ch_pairs[name]['C']) 

176 ch_pairs[name]['H'] = np.array(ch_pairs[name]['H']) 

177 return ch_pairs 

178 

179 

180def natural_sort_key(s): 

181 """Sort strings that contain numbers in human order (C1, C2, C3 instead of C1, C11, C2)""" 

182 import re 

183 return [int(text) if text.isdigit() else text.lower() 

184 for text in re.split('([0-9]+)', s)] 

185 

186 

187def plot_lipid_order(data_filepath: str): 

188 """ 

189 Visualize the lipid order analysis by plotting the order parameters. 

190 

191 Parameters 

192 ---------- 

193 data_filepath : str 

194 Path to the JSON file containing the lipid order analysis results. 

195 """ 

196 import matplotlib.pyplot as plt 

197 

198 # Load the lipid order data 

199 data = load_json(data_filepath) 

200 order_parameters_dict = data.get('data', {}) 

201 

202 # Create output directory if it doesn't exist 

203 

204 # Iterate over each lipid type 

205 for inchikey, chains in order_parameters_dict.items(): 

206 plt.figure(figsize=(10, 6)) 

207 for chain_idx, chain_data in chains.items(): 

208 avg = chain_data['avg'] 

209 std = chain_data['std'] 

210 

211 # Use the index of the atom to align both chains in the graph 

212 atom_indices = range(len(avg)) 

213 

214 # Plot the order parameters with error bars for each chain 

215 eb=plt.errorbar(atom_indices, avg, yerr=std, fmt='o-', label=f'Chain {chain_idx}') 

216 eb[-1][0].set_linestyle('--') 

217 

218 plt.xlabel('Atom Index') 

219 plt.ylabel('Order Parameter (S)') 

220 plt.title(f'Lipid Order Parameters for {inchikey}') 

221 plt.xticks(rotation=45, ha='right') 

222 plt.tight_layout() 

223 plt.legend() 

224 plt.show()