Coverage for mddb_workflow / analyses / rmsds.py: 96%

57 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1from mddb_workflow.tools.xvg_parse import xvg_parse 

2from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory 

3from mddb_workflow.utils.auxiliar import save_json 

4from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME 

5from mddb_workflow.utils.gmx_spells import run_gromacs 

6from mddb_workflow.utils.type_hints import * 

7 

8from os import remove 

9 

10 

11def rmsds( 

12 trajectory_file : 'File', 

13 first_frame_file : 'File', 

14 average_structure_file : 'File', 

15 output_directory : str, 

16 snapshots : int, 

17 structure : 'Structure', 

18 pbc_selection : 'Selection', 

19 inchikey_map : list[dict], 

20 frames_limit : int = 5000, 

21 ): 

22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)  

23 and each selection (default: protein, nucleic).""" 

24 # Set the main output filepath 

25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}' 

26 

27 # Set the default selections to be analyzed 

28 default_selections = { 

29 'protein': structure.select_protein(), 

30 'nucleic': structure.select_nucleic() 

31 } 

32 

33 # Set selections to be analyzed 

34 selections = { **default_selections } 

35 

36 # If there is a ligand map then parse them to selections as well 

37 for ligand in inchikey_map: 

38 if ligand['is_lipid']: continue 

39 selection_name = 'ligand ' + ligand['name'] 

40 selection = structure.select_residue_indices(ligand['residue_indices']) 

41 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail 

42 if len(selection) < 3: continue 

43 # Add current ligand selection to be analyzed 

44 selections[selection_name] = selection 

45 # If the ligand selection totally overlaps with a default selection then remove the default 

46 for default_selection_name, default_selection in default_selections.items(): 

47 if default_selection == selection: del selections[default_selection_name] 

48 

49 # If there is anything left apart from proteins, nucleic acids and ligands then select it 

50 missing_regions = structure.select_all() 

51 for selection in selections.values(): 

52 missing_regions -= selection 

53 if missing_regions: selections['other'] = missing_regions 

54 

55 # Remove PBC residues from parsed selections 

56 non_pbc_selections = {} 

57 for selection_name, selection in selections.items(): 

58 # Substract PBC atoms 

59 non_pbc_selection = selection - pbc_selection 

60 # If selection after substracting pbc atoms becomes empty then discard it 

61 if not non_pbc_selection: continue 

62 # Add the the filtered selection to the dict 

63 non_pbc_selections[selection_name] = non_pbc_selection 

64 

65 # If there is nothing lo analyze at this point then skip the analysis 

66 if len(non_pbc_selections) == 0: 

67 print(' The RMSDs analysis will be skipped since there is nothing to analyze') 

68 return 

69 

70 # Get the coarse grain selection 

71 cg_selection = structure.select_cg() 

72 

73 # The start will be always 0 since we start with the first frame 

74 start = 0 

75 

76 # Reduce the trajectory according to the frames limit 

77 # Use a reduced trajectory in case the original trajectory has many frames 

78 # Note that it makes no difference which reference is used here 

79 reduced_trajectory_filepath, step, frames = get_reduced_trajectory( 

80 first_frame_file, 

81 trajectory_file, 

82 snapshots, 

83 frames_limit, 

84 ) 

85 

86 # Save results in this array 

87 output_analysis = [] 

88 

89 # Set the reference structures to run the RMSD against 

90 rmsd_references = [first_frame_file, average_structure_file] 

91 

92 # Iterate over each reference and group 

93 for reference in rmsd_references: 

94 # Get a standarized reference name 

95 reference_name = REFERENCE_LABELS[reference.filename] 

96 for group_name, group_selection in non_pbc_selections.items(): 

97 # Set the analysis filename 

98 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower().replace(" ","_")}.xvg' 

99 # If part of the selection has coarse grain atoms then skip mass weighting 

100 # Otherwise Gromacs may fail since the atom name may not be in the atommass library 

101 has_cg = group_selection & cg_selection 

102 # Run the rmsd 

103 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted') 

104 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg) 

105 # Read and parse the output file 

106 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values']) 

107 # Format the mined data and append it to the overall output 

108 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å) 

109 rmsd_values = [ v*10 for v in rmsd_data['values'] ] 

110 data = { 

111 'values': rmsd_values, 

112 'reference': reference_name, 

113 'group': group_name 

114 } 

115 output_analysis.append(data) 

116 # Remove the analysis xvg file since it is not required anymore 

117 remove(rmsd_analysis) 

118 

119 # Export the analysis in json format 

120 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath) 

121 

122# RMSD 

123# 

124# Perform the RMSd analysis 

125def rmsd ( 

126 reference_filepath : str, 

127 trajectory_filepath : str, 

128 selection : 'Selection', # This selection will never be empty, since this is checked previously 

129 output_analysis_filepath : str, 

130 skip_mass_weighting : bool = False): 

131 

132 # Convert the selection to a ndx file gromacs can read 

133 selection_name = 'rmsd_selection' 

134 ndx_selection = selection.to_ndx(selection_name) 

135 ndx_filename = '.rmsd.ndx' 

136 with open(ndx_filename, 'w') as file: 

137 file.write(ndx_selection) 

138 

139 # Run Gromacs 

140 run_gromacs(f'rms -s {reference_filepath} -f {trajectory_filepath} \ 

141 -o {output_analysis_filepath} -n {ndx_filename} {"-mw no" if skip_mass_weighting else ""}', 

142 user_input = f'{selection_name} {selection_name}') 

143 

144 # Remove the ndx file 

145 remove(ndx_filename)