Coverage for model_workflow/analyses/rmsds.py: 90%

1from model_workflow.tools.xvg_parse import xvg_parse

2from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory

3from model_workflow.utils.auxiliar import save_json

4from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME

5from model_workflow.utils.type_hints import *

7import os

8from subprocess import run, PIPE, Popen

11def rmsds(

12 trajectory_file : 'File',

13 first_frame_file : 'File',

14 average_structure_file : 'File',

15 output_directory : str,

16 snapshots : int,

17 structure : 'Structure',

18 pbc_selection : 'Selection',

19 ligand_map : List[dict],

20 frames_limit : int = 5000,

21 ):

22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)

23 and each selection (default: protein, nucleic)."""

24 # Set the main output filepath

25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}'

27 # Set the default selections to be analyzed

28 default_selections = {

29 'protein': structure.select_protein(),

30 'nucleic': structure.select_nucleic()

31 }

33 # Set selections to be analyzed

34 selections = { **default_selections }

36 # If there is a ligand map then parse them to selections as well

37 for ligand in ligand_map:

38 selection_name = 'ligand ' + ligand['name']

39 selection = structure.select_residue_indices(ligand['residue_indices'])

40 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail

41 if len(selection) < 3: continue

42 # Add current ligand selection to be analyzed

43 selections[selection_name] = selection

44 # If the ligand selection totally overlaps with a default selection then remove the default

45 for default_selection_name, default_selection in default_selections.items():

46 if default_selection == selection: del selections[default_selection_name]

48 # Remove PBC residues from parsed selections

49 non_pbc_selections = {}

50 for selection_name, selection in selections.items():

51 # Substract PBC atoms

52 non_pbc_selection = selection - pbc_selection

53 # If selection after substracting pbc atoms becomes empty then discard it

54 if not non_pbc_selection:

55 continue

56 # Add the the filtered selection to the dict

57 non_pbc_selections[selection_name] = non_pbc_selection

59 # If there is nothing lo analyze at this point then skip the analysis

60 if len(non_pbc_selections) == 0:

61 print(' The RMSDs analysis will be skipped since there is nothing to analyze')

62 return

64 # Get the coarse grain selection

65 cg_selection = structure.select_cg()

67 # The start will be always 0 since we start with the first frame

68 start = 0

70 # Reduce the trajectory according to the frames limit

71 # Use a reduced trajectory in case the original trajectory has many frames

72 # Note that it makes no difference which reference is used here

73 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(

74 first_frame_file,

75 trajectory_file,

76 snapshots,

77 frames_limit,

78 )

80 # Save results in this array

81 output_analysis = []

83 # Set the reference structures to run the RMSD against

84 rmsd_references = [first_frame_file, average_structure_file]

86 # Iterate over each reference and group

87 for reference in rmsd_references:

88 # Get a standarized reference name

89 reference_name = REFERENCE_LABELS[reference.filename]

90 for group_name, group_selection in non_pbc_selections.items():

91 # Set the analysis filename

92 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower()}.xvg'

93 # If part of the selection has coarse grain atoms then skip mass weighting

94 # Otherwise Gromacs may fail since the atom name may not be in the atommass library

95 has_cg = group_selection & cg_selection

96 # Run the rmsd

97 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted')

98 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg)

99 # Read and parse the output file

100 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values'])

101 # Format the mined data and append it to the overall output

102 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å)

103 rmsd_values = [ v*10 for v in rmsd_data['values'] ]

104 data = {

105 'values': rmsd_values,

106 'reference': reference_name,

107 'group': group_name

108 }

109 output_analysis.append(data)

110 # Remove the analysis xvg file since it is not required anymore

111 os.remove(rmsd_analysis)

112

113 # Export the analysis in json format

114 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)

115

116# RMSD

117#

118# Perform the RMSd analysis

119def rmsd (

120 reference_filepath : str,

121 trajectory_filepath : str,

122 selection : 'Selection', # This selection will never be empty, since this is checked previously

123 output_analysis_filepath : str,

124 skip_mass_weighting : bool = False):

125

126 # Convert the selection to a ndx file gromacs can read

127 selection_name = 'rmsd_selection'

128 ndx_selection = selection.to_ndx(selection_name)

129 ndx_filename = '.rmsd.ndx'

130 with open(ndx_filename, 'w') as file:

131 file.write(ndx_selection)

132

133 # Run Gromacs

134 p = Popen([

135 "echo",

136 selection_name, # Select group for least squares fit

137 selection_name, # Select group for RMSD calculation

138 ], stdout=PIPE)

139 process = run([

140 GROMACS_EXECUTABLE,

141 "rms",

142 "-s",

143 reference_filepath,

144 "-f",

145 trajectory_filepath,

146 '-o',

147 output_analysis_filepath,

148 '-n',

149 ndx_filename,

150 # Supress mass weighting if requested

151 *([ '-mw', 'no' ] if skip_mass_weighting else []),

152 '-quiet'

153 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE)

154 # Consuming the output makes the process run

155 output_logs = process.stdout.decode()

156 # Close the input

157 p.stdout.close()

158

159 # If the output does not exist at this point it means something went wrong with gromacs

160 if not os.path.exists(output_analysis_filepath):

161 print(output_logs)

162 error_logs = process.stderr.decode()

163 print(error_logs)

164 raise Exception('Something went wrong with GROMACS')

165

166 # Remove the ndx file

167 os.remove(ndx_filename)