Coverage for model_workflow/analyses/distance_per_residue.py: 86%
66 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1# Distance per residue analysis
2#
3# Perform the distance per residue analysis between each pair of interacting agents
4# The analysis is carried by pytraj
6from os.path import exists
8import pytraj as pt
9import numpy as np
11from model_workflow.utils.auxiliar import save_json, get_analysis_name, numerate_filename, reprint, warn
12from model_workflow.utils.constants import OUTPUT_DIST_PERRES_FILENAME
13from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory
14from model_workflow.utils.type_hints import *
16# Set a limit of values that can be stored in the analysis without exceeding the mongo limit of 16Mb
17# Also, this limit is a good reference to avoid loading huge analyses which would lead to long response time in the client
18# This is an aproximation below the limit which has been observed experimentally
19N_VALUES_LIMIT = 400000
21def distance_per_residue (
22 structure_file : 'File',
23 trajectory_file : 'File',
24 output_directory : str,
25 structure : 'Structure',
26 interactions : list,
27 snapshots : int,
28 frames_limit : int
29):
30 """Calculate the distance mean and standard deviation of each pair of residues of different agents.
31 Note that the distances are calculated for all residues in the agent, not only the interface residues."""
33 # Return before doing anything if there are no interactions
34 if not interactions or len(interactions) == 0:
35 print('No interactions were specified')
36 return
38 # Set the main output filepath
39 output_analysis_filepath = f'{output_directory}/{OUTPUT_DIST_PERRES_FILENAME}'
41 # Set a reference system to handle conversions to pytraj residue numeration
42 # First set the pytraj topology
43 pytraj_topology = structure.get_pytraj_topology()
44 pytraj_residues = list(pytraj_topology.residues)
45 # Transform a structure residue to the pytraj residue numeration (1, 2, ... n)
46 def residue_2_pytraj_residue_index (residue_index : int) -> int:
47 residue = structure.residues[residue_index]
48 residue_number = residue.number
49 residue_name = residue.name[0:3]
50 # And check that this residue data matches the pytraj residues data
51 pytraj_residue = pytraj_residues[residue_index]
52 if (residue_number == pytraj_residue.original_resid and residue_name == pytraj_residue.name):
53 return residue_index + 1
54 # If not, we must iterate over all pytraj residues to find a match
55 for index, pytraj_residue in enumerate(pytraj_residues):
56 if (residue_number == pytraj_residue.original_resid and residue_name == pytraj_residue.name):
57 return index + 1
58 # Return None if there is no match
59 return None
61 # Parse the trajectory intro ptraj
62 # Reduce it in case it exceeds the frames limit
63 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(
64 structure_file.path, trajectory_file.path, snapshots, frames_limit)
65 # Save each analysis to a dict which will be parsed to json
66 # Here we keep track of the summary, which will also be parsed to json at the end
67 output_summary = []
68 # Run the analysis for each interaction
69 print()
70 for n, interaction in enumerate(interactions):
71 # Get the interaction name
72 name = interaction['name']
73 # Set a filename for the current interaction data
74 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, n)
75 # Add the root of the output analysis filename to the run data
76 analysis_name = get_analysis_name(numbered_output_analysis_filepath)
77 # Append current interaction to the summary
78 output_summary.append({
79 'name': name,
80 'analysis': analysis_name
81 })
82 # If the file already exists then skip to the next
83 if exists(numbered_output_analysis_filepath): continue
84 reprint(f' Analyzing {name} ({n+1}/{len(interactions)})')
85 # Get the residues to be used for this interaction
86 residues_1 = interaction['residue_indices_1']
87 residues_2 = interaction['residue_indices_2']
88 # First of all, calculate the number of values for this interaction
89 # If the interaction has more values than we can store then it will be reduced
90 # Reduced interactions will store only residues in the interface in order to fit
91 n_values = len(residues_1) * len(residues_2)
92 reduced = n_values > N_VALUES_LIMIT
93 # Contact Matrix -- Initialization
94 # Create 2 lists filled with 0s with the length of the residue number arrays respectively
95 if reduced:
96 warn('The analysis has been reduced to interface residues only for this interaction')
97 residues_1 = interaction['interface_residue_indices_1']
98 residues_2 = interaction['interface_residue_indices_2']
99 n_values = len(residues_1) * len(residues_2)
100 if n_values > N_VALUES_LIMIT: raise ValueError('Too many values, even after reducing')
101 # Show the size of the matrix
102 h,w = len(residues_2), len(residues_1)
103 print(f' {h}x{w} residues')
104 # Convert residues to pytraj residue numbers
105 pt_residues_1 = list(map(residue_2_pytraj_residue_index, residues_1))
106 pt_residues_2 = list(map(residue_2_pytraj_residue_index, residues_2))
108 # Contact Matrix -- Calculation (Vectorized)
109 # Create all mask pairs at once
110 mask_pairs = []
111 for r2 in pt_residues_2:
112 for r1 in pt_residues_1:
113 mask_pairs.append(f":{r2} :{r1}")
115 # Calculate all distances in one call
116 all_distances = pt.distance(pt_trajectory, mask_pairs)
118 # Reshape the results into matrices
119 # all_distances shape: (n_pairs, n_frames)
120 means_matrix = np.zeros((h, w))
121 stdvs_matrix = np.zeros((h, w))
123 pair_idx = 0
124 for i in range(h):
125 for j in range(w):
126 distances = all_distances[pair_idx]
127 means_matrix[i][j] = np.mean(distances)
128 stdvs_matrix[i][j] = np.std(distances)
129 pair_idx += 1
130 # Set the output data for this interaction
131 save_json({
132 'name': name,
133 'means': means_matrix.tolist(),
134 'stdvs': stdvs_matrix.tolist(),
135 }, numbered_output_analysis_filepath)
136 # Export the analysis in json format
137 save_json(output_summary, output_analysis_filepath)