Coverage for mddb_workflow/analyses/lipid

1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step

2from mddb_workflow.utils.auxiliar import save_json, load_json

3from mddb_workflow.utils.constants import OUTPUT_LIPID_ORDER_FILENAME

4from mddb_workflow.utils.type_hints import *

5import numpy as np

6import MDAnalysis

9def lipid_order(

10 universe: 'MDAnalysis.Universe',

11 output_directory: str,

12 membrane_map: dict,

13 lipid_map: dict,

14 cg_residues: list[int],

15 snapshots: int,

16 frames_limit: int = 100):

17 """

18 Calculate lipid order parameters for membranes.

19 This function computes the order parameter (S) for lipid acyl chains, defined as:

20 S = 0.5*(3*<cos²θ> - 1)

21 where θ is the angle between the C-H bond and the membrane normal (z-axis).

23 Notes:

24 - For non-cholesterol lipids, analyzes all acyl chains separately.

25 - For cholesterol, uses carbon atoms bonded to hydrogen.

26 - The order parameter is calculated for each carbon atom in the chain.

27 """

28 if membrane_map is None or membrane_map['n_mems'] == 0:

29 print('-> Skipping lipid order analysis')

30 return

31 if len(cg_residues)>0:

32 # RUBEN: se puede hacer con gorder pero hace falta topology con bonds

33 print('-> Skipping lipid order analysis. Not implemented for CG models')

34 return

35 # Set the main output filepath

36 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_ORDER_FILENAME}'

38 order_parameters_dict = {}

39 frame_step, _ = calculate_frame_step(snapshots, frames_limit)

40 for ref in lipid_map:

41 inchikey = ref['match']['ref']['inchikey']

42 # Take the first residue of the reference

43 res = universe.residues[ref['residue_indices'][0]]

44 # If not the cholesterol inchikey

45 if inchikey != 'HVYWMOMLDIMFJA-DPAQBDIFSA-N':

46 # Lipids can have multiple acyl chains

47 carbon_groups = get_all_acyl_chains(res)

48 else:

49 carbon_groups = [res.atoms.select_atoms(

50 'element C and bonded element H').indices]

52 order_parameters_dict[inchikey] = {}

53 # For every 'tail'

54 for chain_idx, group in enumerate(carbon_groups):

55 atoms = res.universe.atoms[group]

56 C_names = sorted([atom.name for atom in atoms],

57 key=natural_sort_key)

58 # Find all C-H bonds indices

59 ch_pairs = find_CH_bonds(universe, ref["residue_indices"], C_names)

60 # Initialize the order parameters to sum over the trajectory

61 order_parameters = []

62 costheta_sums = {C_name: np.zeros(

63 len(ch_pairs[C_name]['C'])) for C_name in C_names}

64 n = 0

65 # Loop over the trajectory

66 for ts in universe.trajectory[0:snapshots:frame_step]:

67 for C_name in C_names:

68 d = universe.atoms[ch_pairs[C_name]['C']].positions - \

69 universe.atoms[ch_pairs[C_name]['H']].positions

70 costheta_sums[C_name] += d[:, 2]**2 / \

71 np.linalg.norm(d, axis=1)**2

72 n += 1

73 order_parameters = 1.5 * \

74 np.array([costheta_sums[C_name].mean()

75 for C_name in C_names])/n - 0.5

76 serrors = 1.5 * np.array([costheta_sums[C_name].std()

77 for C_name in C_names])/n

78 order_parameters_dict[inchikey][str(chain_idx)] = {

79 'atoms': C_names,

80 'avg': order_parameters.tolist(),

81 'std': serrors.tolist()}

82 # Save the data

83 data = {'data': order_parameters_dict}

84 save_json(data, output_analysis_filepath)

87def get_all_acyl_chains(residue: 'MDAnalysis.Residue') -> list[list[int]]:

88 """

89 Finds all groups of connected Carbon atoms within a residue, including cyclic structures.

91 Parameters

92 ----------

93 residue : MDAnalysis.core.groups.Residue

94 The residue to analyze

96 Returns

97 -------

98 list

99 A list of lists, where each inner list contains atom indices of

100 connected Carbon atoms forming a distinct group

101 """

102 def explore_carbon_group(start_atom, visited):

103 """Helper function to explore a connected group of carbons"""

104 to_visit = [start_atom]

105 group = set()

106 while to_visit:

107 current_atom = to_visit.pop(0)

108 if current_atom.index in visited:

109 continue

110 visited.add(current_atom.index)

111 if current_atom.element == 'C':

112 group.add(current_atom.index)

113 # Add all bonded carbon atoms to the visit queue

114 for bond in current_atom.bonds:

115 for bonded_atom in bond.atoms:

116 if (bonded_atom.element == 'C' and

117 bonded_atom.index not in visited):

118 to_visit.append(bonded_atom)

119 return list(group)

120

121 # Get all Carbon atoms in the residue

122 carbon_atoms = residue.atoms.select_atoms('element C')

123 visited = set()

124 # Remove ester carbons (without hydrogens)

125 for at in carbon_atoms:

126 if 'H' not in at.bonded_atoms.elements:

127 visited.add(at.index)

128 carbon_groups = []

129 # Find all distinct groups of connected carbons

130 for carbon in carbon_atoms:

131 if carbon.index not in visited:

132 # Get all carbons connected to this one

133 connected_carbons = explore_carbon_group(carbon, visited)

134 if len(connected_carbons) > 6: # Only add non-empty groups

135 carbon_groups.append(sorted(connected_carbons))

136 return carbon_groups

137

138

139def find_CH_bonds(universe, lipid_resindices, atom_names):

140 """

141 Find all carbon-hydrogen bonds in a lipid molecule using topology information.

142

143 Args:

144 universe: MDAnalysis Universe object with topology information

145 lipid_resname: Residue name of the lipid (default: 'DPPC')

146

147 Returns:

148 dict: Dict for each carbon, of tuples containing (carbon_index, hydrogen_index) for each C-H bond

149 """

150 # Get all carbons in the lipid

151 carbons = universe.residues[lipid_resindices].atoms.select_atoms(

152 'name ' + ' '.join(atom_names))

153 # Get how many carbons with different name that we will average over later

154 ch_pairs = {}

155 for name in atom_names:

156 ch_pairs[name] = {'C': [], 'H': []}

157

158 # Iterate through each carbon

159 for carbon in carbons:

160 # Get all bonds for this carbon

161 for bond in carbon.bonds:

162 # Check if the bond is between C and H

163 atoms = bond.atoms

164 at1_nm = atoms[0].name

165 at2_nm = atoms[1].name

166 if ('C' in at1_nm and 'H' in at2_nm):

167 # Store the indices making sure carbon is first

168 ch_pairs[carbon.name]['C'].append(atoms[0].index)

169 ch_pairs[carbon.name]['H'].append(atoms[1].index)

170 elif 'H' in at1_nm and 'C' in at2_nm:

171 ch_pairs[carbon.name]['C'].append(atoms[1].index)

172 ch_pairs[carbon.name]['H'].append(atoms[0].index)

173 # Convert lists to numpy arrays

174 for name in ch_pairs:

175 ch_pairs[name]['C'] = np.array(ch_pairs[name]['C'])

176 ch_pairs[name]['H'] = np.array(ch_pairs[name]['H'])

177 return ch_pairs

178

179

180def natural_sort_key(s):

181 """Sort strings that contain numbers in human order (C1, C2, C3 instead of C1, C11, C2)"""

182 import re

183 return [int(text) if text.isdigit() else text.lower()

184 for text in re.split('([0-9]+)', s)]

185

186

187def plot_lipid_order(data_filepath: str):

188 """

189 Visualize the lipid order analysis by plotting the order parameters.

190

191 Parameters

192 ----------

193 data_filepath : str

194 Path to the JSON file containing the lipid order analysis results.

195 """

196 import matplotlib.pyplot as plt

197

198 # Load the lipid order data

199 data = load_json(data_filepath)

200 order_parameters_dict = data.get('data', {})

201

202 # Create output directory if it doesn't exist

203

204 # Iterate over each lipid type

205 for inchikey, chains in order_parameters_dict.items():

206 plt.figure(figsize=(10, 6))

207 for chain_idx, chain_data in chains.items():

208 avg = chain_data['avg']

209 std = chain_data['std']

210

211 # Use the index of the atom to align both chains in the graph

212 atom_indices = range(len(avg))

213

214 # Plot the order parameters with error bars for each chain

215 eb=plt.errorbar(atom_indices, avg, yerr=std, fmt='o-', label=f'Chain {chain_idx}')

216 eb[-1][0].set_linestyle('--')

217

218 plt.xlabel('Atom Index')

219 plt.ylabel('Order Parameter (S)')

220 plt.title(f'Lipid Order Parameters for {inchikey}')

221 plt.xticks(rotation=45, ha='right')

222 plt.tight_layout()

223 plt.legend()

224 plt.show()

Coverage for mddb_workflow/analyses/lipid_order.py: 79%

110 statements