Coverage for model_workflow/analyses/rmsds.py: 90%

61 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1from model_workflow.tools.xvg_parse import xvg_parse 

2from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory 

3from model_workflow.utils.auxiliar import save_json 

4from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME 

5from model_workflow.utils.type_hints import * 

6 

7import os 

8from subprocess import run, PIPE, Popen 

9 

10 

11def rmsds( 

12 trajectory_file : 'File', 

13 first_frame_file : 'File', 

14 average_structure_file : 'File', 

15 output_directory : str, 

16 snapshots : int, 

17 structure : 'Structure', 

18 pbc_selection : 'Selection', 

19 ligand_map : List[dict], 

20 frames_limit : int = 5000, 

21 ): 

22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)  

23 and each selection (default: protein, nucleic).""" 

24 # Set the main output filepath 

25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}' 

26 

27 # Set the default selections to be analyzed 

28 default_selections = { 

29 'protein': structure.select_protein(), 

30 'nucleic': structure.select_nucleic() 

31 } 

32 

33 # Set selections to be analyzed 

34 selections = { **default_selections } 

35 

36 # If there is a ligand map then parse them to selections as well 

37 for ligand in ligand_map: 

38 selection_name = 'ligand ' + ligand['name'] 

39 selection = structure.select_residue_indices(ligand['residue_indices']) 

40 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail 

41 if len(selection) < 3: continue 

42 # Add current ligand selection to be analyzed 

43 selections[selection_name] = selection 

44 # If the ligand selection totally overlaps with a default selection then remove the default 

45 for default_selection_name, default_selection in default_selections.items(): 

46 if default_selection == selection: del selections[default_selection_name] 

47 

48 # Remove PBC residues from parsed selections 

49 non_pbc_selections = {} 

50 for selection_name, selection in selections.items(): 

51 # Substract PBC atoms 

52 non_pbc_selection = selection - pbc_selection 

53 # If selection after substracting pbc atoms becomes empty then discard it 

54 if not non_pbc_selection: 

55 continue 

56 # Add the the filtered selection to the dict 

57 non_pbc_selections[selection_name] = non_pbc_selection 

58 

59 # If there is nothing lo analyze at this point then skip the analysis 

60 if len(non_pbc_selections) == 0: 

61 print(' The RMSDs analysis will be skipped since there is nothing to analyze') 

62 return 

63 

64 # Get the coarse grain selection 

65 cg_selection = structure.select_cg() 

66 

67 # The start will be always 0 since we start with the first frame 

68 start = 0 

69 

70 # Reduce the trajectory according to the frames limit 

71 # Use a reduced trajectory in case the original trajectory has many frames 

72 # Note that it makes no difference which reference is used here 

73 reduced_trajectory_filepath, step, frames = get_reduced_trajectory( 

74 first_frame_file, 

75 trajectory_file, 

76 snapshots, 

77 frames_limit, 

78 ) 

79 

80 # Save results in this array 

81 output_analysis = [] 

82 

83 # Set the reference structures to run the RMSD against 

84 rmsd_references = [first_frame_file, average_structure_file] 

85 

86 # Iterate over each reference and group 

87 for reference in rmsd_references: 

88 # Get a standarized reference name 

89 reference_name = REFERENCE_LABELS[reference.filename] 

90 for group_name, group_selection in non_pbc_selections.items(): 

91 # Set the analysis filename 

92 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower()}.xvg' 

93 # If part of the selection has coarse grain atoms then skip mass weighting 

94 # Otherwise Gromacs may fail since the atom name may not be in the atommass library 

95 has_cg = group_selection & cg_selection 

96 # Run the rmsd 

97 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted') 

98 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg) 

99 # Read and parse the output file 

100 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values']) 

101 # Format the mined data and append it to the overall output 

102 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å) 

103 rmsd_values = [ v*10 for v in rmsd_data['values'] ] 

104 data = { 

105 'values': rmsd_values, 

106 'reference': reference_name, 

107 'group': group_name 

108 } 

109 output_analysis.append(data) 

110 # Remove the analysis xvg file since it is not required anymore 

111 os.remove(rmsd_analysis) 

112 

113 # Export the analysis in json format 

114 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath) 

115 

116# RMSD 

117#  

118# Perform the RMSd analysis  

119def rmsd ( 

120 reference_filepath : str, 

121 trajectory_filepath : str, 

122 selection : 'Selection', # This selection will never be empty, since this is checked previously 

123 output_analysis_filepath : str, 

124 skip_mass_weighting : bool = False): 

125 

126 # Convert the selection to a ndx file gromacs can read 

127 selection_name = 'rmsd_selection' 

128 ndx_selection = selection.to_ndx(selection_name) 

129 ndx_filename = '.rmsd.ndx' 

130 with open(ndx_filename, 'w') as file: 

131 file.write(ndx_selection) 

132 

133 # Run Gromacs 

134 p = Popen([ 

135 "echo", 

136 selection_name, # Select group for least squares fit 

137 selection_name, # Select group for RMSD calculation 

138 ], stdout=PIPE) 

139 process = run([ 

140 GROMACS_EXECUTABLE, 

141 "rms", 

142 "-s", 

143 reference_filepath, 

144 "-f", 

145 trajectory_filepath, 

146 '-o', 

147 output_analysis_filepath, 

148 '-n', 

149 ndx_filename, 

150 # Supress mass weighting if requested 

151 *([ '-mw', 'no' ] if skip_mass_weighting else []), 

152 '-quiet' 

153 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE) 

154 # Consuming the output makes the process run 

155 output_logs = process.stdout.decode() 

156 # Close the input 

157 p.stdout.close() 

158 

159 # If the output does not exist at this point it means something went wrong with gromacs 

160 if not os.path.exists(output_analysis_filepath): 

161 print(output_logs) 

162 error_logs = process.stderr.decode() 

163 print(error_logs) 

164 raise Exception('Something went wrong with GROMACS') 

165 

166 # Remove the ndx file 

167 os.remove(ndx_filename)