Coverage for mddb_workflow/analyses/lipid_interactions.py: 13%

76 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step 

2from mddb_workflow.utils.auxiliar import save_json, load_json 

3from mddb_workflow.utils.constants import OUTPUT_LIPID_INTERACTIONS_FILENAME, LIPIDS_RESIDUE_NAMES 

4from mddb_workflow.utils.type_hints import * 

5import numpy as np 

6import MDAnalysis 

7 

8 

9def lipid_interactions ( 

10 universe : 'MDAnalysis.Universe', 

11 output_directory : str, 

12 lipid_map: list[dict], 

13 cg_residues: list[int], 

14 snapshots : int, 

15 frames_limit: int = 100): 

16 """ 

17 Lipid-protein interactions analysis. 

18 """ 

19 output_analysis_filepath = f'{output_directory}/{OUTPUT_LIPID_INTERACTIONS_FILENAME}' 

20 

21 # Check if we're dealing with coarse-grain simulations 

22 if len(cg_residues) > 0: 

23 data = cg_lipid_interactions(universe, snapshots, frames_limit) 

24 elif lipid_map and len(lipid_map) > 0: 

25 data = aa_lipid_interactions(universe, snapshots, frames_limit, lipid_map) 

26 else: 

27 print('-> Skipping lipid-protein interactions analysis') 

28 return 

29 

30 # Wrap the data in a dictionary 

31 data = {'data': data} 

32 save_json(data, output_analysis_filepath) 

33 

34 

35def aa_lipid_interactions(universe, snapshots, frames_limit, lipid_map): 

36 # Original atomistic analysis 

37 frame_step, frame_count = calculate_frame_step(snapshots, frames_limit) 

38 lipids_residx = [residue_idx for lipid in lipid_map for residue_idx in lipid['residue_indices']] 

39 # Create a mapping from lipid residue indices to array indices 

40 residx_2_idx = {residx: i for i, residx in enumerate(lipids_residx)} 

41 lipid_group = universe.select_atoms(f'resindex {" ".join(map(str, lipids_residx))}') 

42 # A counter for each pair of protein-lipid residues 

43 protein_residx = universe.select_atoms('protein').residues.resindices 

44 ocupancy_arrs = np.zeros((len(protein_residx), len(lipids_residx))) 

45 

46 # Only iterate through the frames you need. TODO: parallel frames 

47 for ts in universe.trajectory[0:snapshots:frame_step]: 

48 # Select lipid atoms near the protein once 

49 lipid_near_prot = universe.select_atoms(f'(around 6 protein) and group lipid_group and not protein', 

50 lipid_group=lipid_group) 

51 for i, residx in enumerate(protein_residx): 

52 # Find lipid atoms near a specific residue 

53 residue_atoms = universe.select_atoms(f'resindex {residx}') 

54 lipid_near_res = lipid_near_prot.select_atoms(f'around 6 global group residuegroup', 

55 residuegroup=residue_atoms) 

56 # Add the count of lipids 

57 for lipid_residx in lipid_near_res.residues.resindices: 

58 ocupancy_arrs[i, residx_2_idx[lipid_residx]] += 1 

59 

60 # Normalize the occupancy arrays by dividing by the number of frames 

61 ocupancy_arrs /= frame_count 

62 

63 # Save the data 

64 data = { 'residue_indices': protein_residx.tolist()} 

65 for lipid in lipid_map: 

66 # Convert lipid residue indices to array indices 

67 lipid_idx = [residx_2_idx[residx] for residx in lipid['residue_indices']] 

68 data[lipid['match']['ref']['inchikey']] = ocupancy_arrs[:, lipid_idx].sum(1).tolist() 

69 

70 return data 

71 

72 

73def cg_lipid_interactions(universe, snapshots, frames_limit): 

74 print('-> Performing coarse-grain lipid-protein interactions analysis') 

75 # Identify lipid residues by name for coarse-grain simulations 

76 lipid_residues = [] 

77 for resname in LIPIDS_RESIDUE_NAMES: 

78 lipids = universe.select_atoms(f'resname {resname}') 

79 if len(lipids) > 0: 

80 lipid_residues.append(resname) 

81 

82 if not lipid_residues: 

83 print('-> No lipid residues found for coarse-grain analysis') 

84 return {} 

85 

86 frame_step, frame_count = calculate_frame_step(snapshots, frames_limit) 

87 protein_residx = universe.select_atoms('protein').residues.resindices 

88 assert len(protein_residx) > 0 

89 

90 # Create occupancy arrays for each lipid type 

91 lipid_occupancy = {resname: np.zeros(len(protein_residx)) for resname in lipid_residues} 

92 

93 # Iterate through frames 

94 for ts in universe.trajectory[0:snapshots:frame_step]: 

95 for i, residx in enumerate(protein_residx): 

96 residue_atoms = universe.select_atoms(f'resindex {residx}') 

97 

98 # Check interactions with each lipid type 

99 for resname in lipid_residues: 

100 lipids_near_res = universe.select_atoms(f'(around 6 global group residuegroup) and resname {resname}', 

101 residuegroup=residue_atoms) 

102 if len(lipids_near_res) > 0: 

103 lipid_occupancy[resname][i] += len(lipids_near_res.residues) 

104 

105 # Normalize by frame count 

106 for resname in lipid_residues: 

107 lipid_occupancy[resname] /= frame_count 

108 

109 # Save the data in the same format as atomistic analysis 

110 data = {'residue_indices': protein_residx.tolist()} 

111 for resname in lipid_residues: 

112 data[resname] = lipid_occupancy[resname].tolist() 

113 

114 return data 

115 

116 

117def plot_lipid_interactions (output_analysis_filepath: str): 

118 import matplotlib.pyplot as plt 

119 data = load_json(output_analysis_filepath)['data'] 

120 

121 protein_residues = data['residue_indices'] 

122 lipid_keys = [key for key in data.keys() if key != 'residue_indices'] 

123 

124 # Create a figure for the plot 

125 plt.figure(figsize=(10, 6)) 

126 

127 for lipid_key in lipid_keys: 

128 interactions = data[lipid_key] 

129 plt.plot(protein_residues, interactions, label=lipid_key) 

130 

131 plt.xlabel('Protein Residue Indices') 

132 plt.ylabel('Interaction Count (Normalized)') 

133 plt.title('Lipid-Protein Interactions') 

134 plt.legend(); plt.grid(True); plt.tight_layout(); plt.show()