Coverage for model_workflow/analyses/rmsd_pairwise.py: 79%
48 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1# RMSD pairwise analysis
2#
3# Perform the RMSD analysis for pair of frames in the trajectory
4# The analysis is carried by pytraj
6from os.path import exists
8import pytraj as pt
10from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory
11from model_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn
12from model_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME
13from model_workflow.utils.type_hints import *
15# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions)
16# Take a minimal subset of atoms representing both proteins and nucleic acids
17def rmsd_pairwise(
18 structure_file : str,
19 trajectory_file : str,
20 output_directory : str,
21 interactions : list,
22 snapshots : int,
23 structure : 'Structure',
24 pbc_selection : 'Selection',
25 frames_limit : int = 200,
26 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj
27 ):
28 """Perform an analysis for the overall structure and then one more analysis for each interaction."""
30 # Set the main output filepath
31 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}'
33 # Parse the trajectory intro ptraj
34 # Reduce it in case it exceeds the frames limit
35 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(
36 structure_file.path, trajectory_file.path, snapshots, frames_limit)
38 # Parse the overall selection
39 parsed_overall_selection = structure.select(overall_selection, syntax='vmd')
40 # If the default selection is empty then use all atoms instead
41 if not parsed_overall_selection:
42 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead')
43 parsed_overall_selection = structure.select_all()
44 # If the default selection has one atom only then use all atoms instead
45 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment
46 if len(parsed_overall_selection) == 1:
47 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead')
48 parsed_overall_selection = structure.select_all()
50 # Remove PBC residues from the selection
51 parsed_overall_selection -= pbc_selection
52 if not parsed_overall_selection:
53 print(f' Empty selection after substracting PBC atoms')
54 return
56 # Save each analysis to a dict which will be parsed to json
57 output_summary = []
59 # Set a filename for the current interaction data
60 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0)
61 # Get the root name out of the output analysis filepath
62 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath)
63 # Add it to the summary
64 output_summary.append({
65 'name': 'Overall',
66 'analysis': overall_analysis_name,
67 })
69 # If the overall output file does not exist then run the overall analysis
70 if not exists(overall_output_analysis_filepath):
71 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)')
72 # Run the analysis
73 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj())
74 # Convert data to a normal list, since numpy ndarrays are not json serializable
75 data = data.tolist()
76 # In case there is no data we stop here
77 if len(data) == 0: raise SystemExit('Something went wrong with pytraj')
78 # Write the overall analysis output data to disk
79 save_json({
80 'name': 'Overall',
81 'rmsds': data,
82 'start': 0,
83 'step': frame_step
84 }, overall_output_analysis_filepath)
86 # Repeat the analysis with the interface residues of each interaction
87 for i, interaction in enumerate(interactions, 1):
88 # Get the interaction name
89 name = interaction['name']
90 # Parse the interaction selection
91 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2']
92 interaction_selection = structure.select_atom_indices(interaction_atom_indices)
93 # If the interaction selection matches the overall selection then reuse its data
94 # This was very unlinkly until we started to support coarse grain systems
95 # Now a CG DNA-DNA hybridization will likely fall here
96 if interaction_selection == parsed_overall_selection:
97 # Warn the user
98 warning = 'Interface selection is identical to overall selection. Data will be reused.'
99 warn(f'{name}: {warning}')
100 output_summary.append({
101 'name': name,
102 # Add a note so nobody thinks this is an error
103 'note': warning,
104 'analysis': overall_analysis_name
105 })
106 # At this point overall analysis outout should already exists so we skip the analysis
107 continue
109 # Set a filename for the current interaction data
110 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)
111 # Add the root of the output analysis filename to the run data
112 analysis_name = get_analysis_name(numbered_output_analysis_filepath)
113 # Append current interaction to the summary
114 output_summary.append({
115 'name': name,
116 'analysis': analysis_name
117 })
119 # If the analysis already exists then proceed to the next interaction
120 if exists(numbered_output_analysis_filepath): continue
121 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)')
122 # Run the analysis
123 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj())
124 # Convert data to a normal list, since numpy ndarrays are not json serializable
125 data = data.tolist()
126 # Write the interaction analysis output data to disk
127 save_json({
128 'name': name,
129 'rmsds': data,
130 'start': 0,
131 'step': frame_step
132 }, numbered_output_analysis_filepath)
134 # Write in the analysis the starting frame and the step between frames
135 # By default the first frame in the reduced trajectory is the first frame (0)
137 # Export the analysis in json format
138 save_json(output_summary, output_analysis_filepath)