Coverage for model_workflow/analyses/rmsds.py: 90%
61 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1from model_workflow.tools.xvg_parse import xvg_parse
2from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory
3from model_workflow.utils.auxiliar import save_json
4from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME
5from model_workflow.utils.type_hints import *
7import os
8from subprocess import run, PIPE, Popen
11def rmsds(
12 trajectory_file : 'File',
13 first_frame_file : 'File',
14 average_structure_file : 'File',
15 output_directory : str,
16 snapshots : int,
17 structure : 'Structure',
18 pbc_selection : 'Selection',
19 ligand_map : List[dict],
20 frames_limit : int = 5000,
21 ):
22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)
23 and each selection (default: protein, nucleic)."""
24 # Set the main output filepath
25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}'
27 # Set the default selections to be analyzed
28 default_selections = {
29 'protein': structure.select_protein(),
30 'nucleic': structure.select_nucleic()
31 }
33 # Set selections to be analyzed
34 selections = { **default_selections }
36 # If there is a ligand map then parse them to selections as well
37 for ligand in ligand_map:
38 selection_name = 'ligand ' + ligand['name']
39 selection = structure.select_residue_indices(ligand['residue_indices'])
40 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail
41 if len(selection) < 3: continue
42 # Add current ligand selection to be analyzed
43 selections[selection_name] = selection
44 # If the ligand selection totally overlaps with a default selection then remove the default
45 for default_selection_name, default_selection in default_selections.items():
46 if default_selection == selection: del selections[default_selection_name]
48 # Remove PBC residues from parsed selections
49 non_pbc_selections = {}
50 for selection_name, selection in selections.items():
51 # Substract PBC atoms
52 non_pbc_selection = selection - pbc_selection
53 # If selection after substracting pbc atoms becomes empty then discard it
54 if not non_pbc_selection:
55 continue
56 # Add the the filtered selection to the dict
57 non_pbc_selections[selection_name] = non_pbc_selection
59 # If there is nothing lo analyze at this point then skip the analysis
60 if len(non_pbc_selections) == 0:
61 print(' The RMSDs analysis will be skipped since there is nothing to analyze')
62 return
64 # Get the coarse grain selection
65 cg_selection = structure.select_cg()
67 # The start will be always 0 since we start with the first frame
68 start = 0
70 # Reduce the trajectory according to the frames limit
71 # Use a reduced trajectory in case the original trajectory has many frames
72 # Note that it makes no difference which reference is used here
73 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(
74 first_frame_file,
75 trajectory_file,
76 snapshots,
77 frames_limit,
78 )
80 # Save results in this array
81 output_analysis = []
83 # Set the reference structures to run the RMSD against
84 rmsd_references = [first_frame_file, average_structure_file]
86 # Iterate over each reference and group
87 for reference in rmsd_references:
88 # Get a standarized reference name
89 reference_name = REFERENCE_LABELS[reference.filename]
90 for group_name, group_selection in non_pbc_selections.items():
91 # Set the analysis filename
92 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower()}.xvg'
93 # If part of the selection has coarse grain atoms then skip mass weighting
94 # Otherwise Gromacs may fail since the atom name may not be in the atommass library
95 has_cg = group_selection & cg_selection
96 # Run the rmsd
97 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted')
98 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg)
99 # Read and parse the output file
100 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values'])
101 # Format the mined data and append it to the overall output
102 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å)
103 rmsd_values = [ v*10 for v in rmsd_data['values'] ]
104 data = {
105 'values': rmsd_values,
106 'reference': reference_name,
107 'group': group_name
108 }
109 output_analysis.append(data)
110 # Remove the analysis xvg file since it is not required anymore
111 os.remove(rmsd_analysis)
113 # Export the analysis in json format
114 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)
116# RMSD
117#
118# Perform the RMSd analysis
119def rmsd (
120 reference_filepath : str,
121 trajectory_filepath : str,
122 selection : 'Selection', # This selection will never be empty, since this is checked previously
123 output_analysis_filepath : str,
124 skip_mass_weighting : bool = False):
126 # Convert the selection to a ndx file gromacs can read
127 selection_name = 'rmsd_selection'
128 ndx_selection = selection.to_ndx(selection_name)
129 ndx_filename = '.rmsd.ndx'
130 with open(ndx_filename, 'w') as file:
131 file.write(ndx_selection)
133 # Run Gromacs
134 p = Popen([
135 "echo",
136 selection_name, # Select group for least squares fit
137 selection_name, # Select group for RMSD calculation
138 ], stdout=PIPE)
139 process = run([
140 GROMACS_EXECUTABLE,
141 "rms",
142 "-s",
143 reference_filepath,
144 "-f",
145 trajectory_filepath,
146 '-o',
147 output_analysis_filepath,
148 '-n',
149 ndx_filename,
150 # Supress mass weighting if requested
151 *([ '-mw', 'no' ] if skip_mass_weighting else []),
152 '-quiet'
153 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE)
154 # Consuming the output makes the process run
155 output_logs = process.stdout.decode()
156 # Close the input
157 p.stdout.close()
159 # If the output does not exist at this point it means something went wrong with gromacs
160 if not os.path.exists(output_analysis_filepath):
161 print(output_logs)
162 error_logs = process.stderr.decode()
163 print(error_logs)
164 raise Exception('Something went wrong with GROMACS')
166 # Remove the ndx file
167 os.remove(ndx_filename)