Coverage for model_workflow/analyses/rmsd_per_residue.py: 84%
32 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import pytraj as pt
3from distutils.version import StrictVersion
5from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory
6from model_workflow.utils.auxiliar import save_json
7from model_workflow.utils.constants import OUTPUT_RMSD_PERRES_FILENAME
8from model_workflow.utils.type_hints import *
11def rmsd_per_residue (
12 structure_file : str,
13 trajectory_file : 'File',
14 output_directory : str,
15 structure : 'Structure',
16 pbc_selection : 'Selection',
17 snapshots : int,
18 frames_limit : int = 100):
19 """Perform the RMSD analysis for each residue."""
21 # Set the main output filepath
22 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PERRES_FILENAME}'
24 # Parse the trajectory intro pytraj
25 # Reduce it in case it exceeds the frames limit
26 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(
27 structure_file.path, trajectory_file.path, snapshots, frames_limit)
29 # We must exclude here PBC residues from the analysis
30 # PBC residues may jump through boundaries thus having non-sense high RMSD values
31 # Filter the trajectory with atoms to be included in the analysis
32 filter_in_selection = structure.invert_selection(pbc_selection)
34 # Calculate the residue indices of the overall structure remaining in the filtered trajectory
35 residue_indices = structure.get_selection_residue_indices(filter_in_selection)
36 # Count the number of residues
37 residue_count = len(residue_indices)
39 # Make sure we have more than 1 residue left at this point
40 # Otherwise it makes not sense running this analysis for a single residue
41 if residue_count <= 1:
42 tail = 'no residues at all' if residue_count == 0 else 'one residue only'
43 print(' The RMSD per residue analysis will be skipped since it would be applied to ' + tail)
44 return
46 # Convert the selection to a pytraj mask
47 pytraj_selection = filter_in_selection.to_pytraj()
48 # Apply the mask to the trajectory
49 filtered_pt_trajectory = pt_trajectory[pytraj_selection]
50 # WARNING: This extra line prevents the error "Segment violation (core dumped)" in some pdbs
51 # This happens with some random pdbs which pytraj considers to have 0 Mols
52 # More info: https://github.com/Amber-MD/cpptraj/pull/820
53 # IMPORTANT: This is critical for pytraj <= 2.0.5 but it makes the code fail for pytraj 2.0.6
54 if StrictVersion(pt.__version__) <= StrictVersion('2.0.5'):
55 filtered_pt_trajectory.top.start_new_mol()
57 # Run the analysis in pytraj
58 # The result data is a custom pytraj class: pytraj.datasets.datasetlist.DatasetList
59 # This class has keys but its attributes can not be accessed through the key
60 # They must be accessed through the index
61 # IMPORTANT
62 # Note that the argument 'resrange' may be redundant, since we always target for all residues
63 # If this argument is not passed then it prints "Error: Range::SetRange(None): Range is -1 for None"
64 # However, most of the times there is no problem and the analysis runs flawlessly anyway
65 # We started to have problems with some ions but then we had no results at all with coarse grain
66 # More info: https://github.com/Amber-MD/pytraj/issues/1580
67 resrange = f'1-{structure.residue_count + 1}'
68 data = pt.rmsd_perres(filtered_pt_trajectory, resrange=resrange)
70 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001'
71 del data[0]
73 # Check the expected output number of residues to match with the pytraj results
74 if residue_count != len(data):
75 raise ValueError(f'Number of residues in structure ({residue_count}) does not match number of residues in RMSD-perres results ({len(data)})')
77 # Mine the analysis data
78 whole_structure_residues_count = len(structure.residues)
79 rmsd_per_residue = [ None ] * whole_structure_residues_count
80 for index, residue_data in enumerate(data):
81 # Get the actual residue index of the current data
82 residue_index = residue_indices[index]
83 # Add current data to its corresponding position in the 'per residue' list
84 rmsd_per_residue[residue_index] = list(residue_data)
86 # Export the analysis in json format
87 output_analysis = { 'step': frame_step, 'rmsdpr': rmsd_per_residue }
88 save_json(output_analysis, output_analysis_filepath)