Coverage for mddb_workflow/analyses/rmsds.py: 96%

53 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from mddb_workflow.tools.xvg_parse import xvg_parse 

2from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory 

3from mddb_workflow.utils.auxiliar import save_json 

4from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME 

5from mddb_workflow.utils.gmx_spells import run_gromacs 

6from mddb_workflow.utils.type_hints import * 

7 

8from os import remove 

9 

10 

11def rmsds( 

12 trajectory_file : 'File', 

13 first_frame_file : 'File', 

14 average_structure_file : 'File', 

15 output_directory : str, 

16 snapshots : int, 

17 structure : 'Structure', 

18 pbc_selection : 'Selection', 

19 ligand_map : list[dict], 

20 frames_limit : int = 5000, 

21 ): 

22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)  

23 and each selection (default: protein, nucleic).""" 

24 # Set the main output filepath 

25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}' 

26 

27 # Set the default selections to be analyzed 

28 default_selections = { 

29 'protein': structure.select_protein(), 

30 'nucleic': structure.select_nucleic() 

31 } 

32 

33 # Set selections to be analyzed 

34 selections = { **default_selections } 

35 

36 # If there is a ligand map then parse them to selections as well 

37 for ligand in ligand_map: 

38 selection_name = 'ligand ' + ligand['name'] 

39 selection = structure.select_residue_indices(ligand['residue_indices']) 

40 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail 

41 if len(selection) < 3: continue 

42 # Add current ligand selection to be analyzed 

43 selections[selection_name] = selection 

44 # If the ligand selection totally overlaps with a default selection then remove the default 

45 for default_selection_name, default_selection in default_selections.items(): 

46 if default_selection == selection: del selections[default_selection_name] 

47 

48 # Remove PBC residues from parsed selections 

49 non_pbc_selections = {} 

50 for selection_name, selection in selections.items(): 

51 # Substract PBC atoms 

52 non_pbc_selection = selection - pbc_selection 

53 # If selection after substracting pbc atoms becomes empty then discard it 

54 if not non_pbc_selection: 

55 continue 

56 # Add the the filtered selection to the dict 

57 non_pbc_selections[selection_name] = non_pbc_selection 

58 

59 # If there is nothing lo analyze at this point then skip the analysis 

60 if len(non_pbc_selections) == 0: 

61 print(' The RMSDs analysis will be skipped since there is nothing to analyze') 

62 return 

63 

64 # Get the coarse grain selection 

65 cg_selection = structure.select_cg() 

66 

67 # The start will be always 0 since we start with the first frame 

68 start = 0 

69 

70 # Reduce the trajectory according to the frames limit 

71 # Use a reduced trajectory in case the original trajectory has many frames 

72 # Note that it makes no difference which reference is used here 

73 reduced_trajectory_filepath, step, frames = get_reduced_trajectory( 

74 first_frame_file, 

75 trajectory_file, 

76 snapshots, 

77 frames_limit, 

78 ) 

79 

80 # Save results in this array 

81 output_analysis = [] 

82 

83 # Set the reference structures to run the RMSD against 

84 rmsd_references = [first_frame_file, average_structure_file] 

85 

86 # Iterate over each reference and group 

87 for reference in rmsd_references: 

88 # Get a standarized reference name 

89 reference_name = REFERENCE_LABELS[reference.filename] 

90 for group_name, group_selection in non_pbc_selections.items(): 

91 # Set the analysis filename 

92 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower().replace(" ","_")}.xvg' 

93 # If part of the selection has coarse grain atoms then skip mass weighting 

94 # Otherwise Gromacs may fail since the atom name may not be in the atommass library 

95 has_cg = group_selection & cg_selection 

96 # Run the rmsd 

97 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted') 

98 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg) 

99 # Read and parse the output file 

100 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values']) 

101 # Format the mined data and append it to the overall output 

102 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å) 

103 rmsd_values = [ v*10 for v in rmsd_data['values'] ] 

104 data = { 

105 'values': rmsd_values, 

106 'reference': reference_name, 

107 'group': group_name 

108 } 

109 output_analysis.append(data) 

110 # Remove the analysis xvg file since it is not required anymore 

111 remove(rmsd_analysis) 

112 

113 # Export the analysis in json format 

114 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath) 

115 

116# RMSD 

117#  

118# Perform the RMSd analysis  

119def rmsd ( 

120 reference_filepath : str, 

121 trajectory_filepath : str, 

122 selection : 'Selection', # This selection will never be empty, since this is checked previously 

123 output_analysis_filepath : str, 

124 skip_mass_weighting : bool = False): 

125 

126 # Convert the selection to a ndx file gromacs can read 

127 selection_name = 'rmsd_selection' 

128 ndx_selection = selection.to_ndx(selection_name) 

129 ndx_filename = '.rmsd.ndx' 

130 with open(ndx_filename, 'w') as file: 

131 file.write(ndx_selection) 

132 

133 # Run Gromacs 

134 run_gromacs(f'rms -s {reference_filepath} -f {trajectory_filepath} \ 

135 -o {output_analysis_filepath} -n {ndx_filename} {"-mw no" if skip_mass_weighting else ""}', 

136 user_input = f'{selection_name} {selection_name}') 

137 

138 # Remove the ndx file 

139 remove(ndx_filename)