Coverage for mddb_workflow / analyses / rmsd_pairwise.py: 84%
49 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1# RMSD pairwise analysis
2#
3# Perform the RMSD analysis for pair of frames in the trajectory
4# The analysis is carried by pytraj
6from os.path import exists
8import pytraj as pt
10from mddb_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory
11from mddb_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn
12from mddb_workflow.utils.auxiliar import ToolError
13from mddb_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME
14from mddb_workflow.utils.type_hints import *
16# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions)
17# Take a minimal subset of atoms representing both proteins and nucleic acids
18def rmsd_pairwise(
19 structure_file : str,
20 trajectory_file : str,
21 output_directory : str,
22 interactions : list,
23 snapshots : int,
24 structure : 'Structure',
25 pbc_selection : 'Selection',
26 frames_limit : int = 200,
27 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj
28 ):
29 """Perform an analysis for the overall structure and then one more analysis for each interaction."""
31 # Set the main output filepath
32 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}'
34 # Parse the trajectory intro ptraj
35 # Reduce it in case it exceeds the frames limit
36 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(
37 structure_file.path, trajectory_file.path, snapshots, frames_limit)
39 # Parse the overall selection
40 parsed_overall_selection = structure.select(overall_selection, syntax='vmd')
41 # If the default selection is empty then use all atoms instead
42 if not parsed_overall_selection:
43 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead')
44 parsed_overall_selection = structure.select_all()
45 # If the default selection has one atom only then use all atoms instead
46 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment
47 if len(parsed_overall_selection) == 1:
48 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead')
49 parsed_overall_selection = structure.select_all()
51 # Remove PBC residues from the selection
52 parsed_overall_selection -= pbc_selection
53 if not parsed_overall_selection:
54 print(f' Empty selection after substracting PBC atoms')
55 return
57 # Save each analysis to a dict which will be parsed to json
58 output_summary = []
60 # Set a filename for the current interaction data
61 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0)
62 # Get the root name out of the output analysis filepath
63 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath)
64 # Add it to the summary
65 output_summary.append({
66 'name': 'Overall',
67 'analysis': overall_analysis_name,
68 })
70 # If the overall output file does not exist then run the overall analysis
71 if not exists(overall_output_analysis_filepath):
72 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)')
73 # Run the analysis
74 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj())
75 # Convert data to a normal list, since numpy ndarrays are not json serializable
76 data = data.tolist()
77 # In case there is no data we stop here
78 if len(data) == 0: raise ToolError('Something went wrong with pytraj')
79 # Write the overall analysis output data to disk
80 save_json({
81 'name': 'Overall',
82 'rmsds': data,
83 'start': 0,
84 'step': frame_step
85 }, overall_output_analysis_filepath)
87 # Repeat the analysis with the interface residues of each interaction
88 for i, interaction in enumerate(interactions, 1):
89 # Get the interaction name
90 name = interaction['name']
91 # Parse the interaction selection
92 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2']
93 interaction_selection = structure.select_atom_indices(interaction_atom_indices)
94 # If the interaction selection matches the overall selection then reuse its data
95 # This was very unlinkly until we started to support coarse grain systems
96 # Now a CG DNA-DNA hybridization will likely fall here
97 if interaction_selection == parsed_overall_selection:
98 # Warn the user
99 warning = 'Interface selection is identical to overall selection. Data will be reused.'
100 warn(f'{name}: {warning}')
101 output_summary.append({
102 'name': name,
103 # Add a note so nobody thinks this is an error
104 'note': warning,
105 'analysis': overall_analysis_name
106 })
107 # At this point overall analysis outout should already exists so we skip the analysis
108 continue
110 # Set a filename for the current interaction data
111 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)
112 # Add the root of the output analysis filename to the run data
113 analysis_name = get_analysis_name(numbered_output_analysis_filepath)
114 # Append current interaction to the summary
115 output_summary.append({
116 'name': name,
117 'analysis': analysis_name
118 })
120 # If the analysis already exists then proceed to the next interaction
121 if exists(numbered_output_analysis_filepath): continue
122 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)')
123 # Run the analysis
124 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj())
125 # Convert data to a normal list, since numpy ndarrays are not json serializable
126 data = data.tolist()
127 # Write the interaction analysis output data to disk
128 save_json({
129 'name': name,
130 'rmsds': data,
131 'start': 0,
132 'step': frame_step
133 }, numbered_output_analysis_filepath)
135 # Write in the analysis the starting frame and the step between frames
136 # By default the first frame in the reduced trajectory is the first frame (0)
138 # Export the analysis in json format
139 save_json(output_summary, output_analysis_filepath)