Coverage for mddb_workflow / analyses / rmsds.py: 96%
57 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1from mddb_workflow.tools.xvg_parse import xvg_parse
2from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory
3from mddb_workflow.utils.auxiliar import save_json
4from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME
5from mddb_workflow.utils.gmx_spells import run_gromacs
6from mddb_workflow.utils.type_hints import *
8from os import remove
11def rmsds(
12 trajectory_file : 'File',
13 first_frame_file : 'File',
14 average_structure_file : 'File',
15 output_directory : str,
16 snapshots : int,
17 structure : 'Structure',
18 pbc_selection : 'Selection',
19 inchikey_map : list[dict],
20 frames_limit : int = 5000,
21 ):
22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)
23 and each selection (default: protein, nucleic)."""
24 # Set the main output filepath
25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}'
27 # Set the default selections to be analyzed
28 default_selections = {
29 'protein': structure.select_protein(),
30 'nucleic': structure.select_nucleic()
31 }
33 # Set selections to be analyzed
34 selections = { **default_selections }
36 # If there is a ligand map then parse them to selections as well
37 for ligand in inchikey_map:
38 if ligand['is_lipid']: continue
39 selection_name = 'ligand ' + ligand['name']
40 selection = structure.select_residue_indices(ligand['residue_indices'])
41 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail
42 if len(selection) < 3: continue
43 # Add current ligand selection to be analyzed
44 selections[selection_name] = selection
45 # If the ligand selection totally overlaps with a default selection then remove the default
46 for default_selection_name, default_selection in default_selections.items():
47 if default_selection == selection: del selections[default_selection_name]
49 # If there is anything left apart from proteins, nucleic acids and ligands then select it
50 missing_regions = structure.select_all()
51 for selection in selections.values():
52 missing_regions -= selection
53 if missing_regions: selections['other'] = missing_regions
55 # Remove PBC residues from parsed selections
56 non_pbc_selections = {}
57 for selection_name, selection in selections.items():
58 # Substract PBC atoms
59 non_pbc_selection = selection - pbc_selection
60 # If selection after substracting pbc atoms becomes empty then discard it
61 if not non_pbc_selection: continue
62 # Add the the filtered selection to the dict
63 non_pbc_selections[selection_name] = non_pbc_selection
65 # If there is nothing lo analyze at this point then skip the analysis
66 if len(non_pbc_selections) == 0:
67 print(' The RMSDs analysis will be skipped since there is nothing to analyze')
68 return
70 # Get the coarse grain selection
71 cg_selection = structure.select_cg()
73 # The start will be always 0 since we start with the first frame
74 start = 0
76 # Reduce the trajectory according to the frames limit
77 # Use a reduced trajectory in case the original trajectory has many frames
78 # Note that it makes no difference which reference is used here
79 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(
80 first_frame_file,
81 trajectory_file,
82 snapshots,
83 frames_limit,
84 )
86 # Save results in this array
87 output_analysis = []
89 # Set the reference structures to run the RMSD against
90 rmsd_references = [first_frame_file, average_structure_file]
92 # Iterate over each reference and group
93 for reference in rmsd_references:
94 # Get a standarized reference name
95 reference_name = REFERENCE_LABELS[reference.filename]
96 for group_name, group_selection in non_pbc_selections.items():
97 # Set the analysis filename
98 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower().replace(" ","_")}.xvg'
99 # If part of the selection has coarse grain atoms then skip mass weighting
100 # Otherwise Gromacs may fail since the atom name may not be in the atommass library
101 has_cg = group_selection & cg_selection
102 # Run the rmsd
103 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted')
104 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg)
105 # Read and parse the output file
106 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values'])
107 # Format the mined data and append it to the overall output
108 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å)
109 rmsd_values = [ v*10 for v in rmsd_data['values'] ]
110 data = {
111 'values': rmsd_values,
112 'reference': reference_name,
113 'group': group_name
114 }
115 output_analysis.append(data)
116 # Remove the analysis xvg file since it is not required anymore
117 remove(rmsd_analysis)
119 # Export the analysis in json format
120 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)
122# RMSD
123#
124# Perform the RMSd analysis
125def rmsd (
126 reference_filepath : str,
127 trajectory_filepath : str,
128 selection : 'Selection', # This selection will never be empty, since this is checked previously
129 output_analysis_filepath : str,
130 skip_mass_weighting : bool = False):
132 # Convert the selection to a ndx file gromacs can read
133 selection_name = 'rmsd_selection'
134 ndx_selection = selection.to_ndx(selection_name)
135 ndx_filename = '.rmsd.ndx'
136 with open(ndx_filename, 'w') as file:
137 file.write(ndx_selection)
139 # Run Gromacs
140 run_gromacs(f'rms -s {reference_filepath} -f {trajectory_filepath} \
141 -o {output_analysis_filepath} -n {ndx_filename} {"-mw no" if skip_mass_weighting else ""}',
142 user_input = f'{selection_name} {selection_name}')
144 # Remove the ndx file
145 remove(ndx_filename)