Coverage for mddb_workflow / analyses / distance_per_residue.py: 18%

66 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1# Distance per residue analysis 

2# 

3# Perform the distance per residue analysis between each pair of interacting agents 

4# The analysis is carried by pytraj 

5 

6from os.path import exists 

7 

8import pytraj as pt 

9import numpy as np 

10 

11from mddb_workflow.utils.auxiliar import save_json, get_analysis_name, numerate_filename, reprint, warn 

12from mddb_workflow.utils.constants import OUTPUT_DIST_PERRES_FILENAME 

13from mddb_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory 

14from mddb_workflow.utils.type_hints import * 

15 

16# Set a limit of values that can be stored in the analysis without exceeding the mongo limit of 16Mb 

17# Also, this limit is a good reference to avoid loading huge analyses which would lead to long response time in the client 

18# This is an aproximation below the limit which has been observed experimentally 

19N_VALUES_LIMIT = 400000 

20 

21 

22def distance_per_residue( 

23 structure_file: 'File', 

24 trajectory_file: 'File', 

25 output_directory: str, 

26 structure: 'Structure', 

27 interactions: list, 

28 snapshots: int, 

29 frames_limit: int 

30): 

31 """Calculate the distance mean and standard deviation of each pair of residues of different agents. 

32 Note that the distances are calculated for all residues in the agent, not only the interface residues. 

33 """ 

34 # Return before doing anything if there are no interactions 

35 if not interactions or len(interactions) == 0: 

36 print('No interactions were specified') 

37 return 

38 

39 # Set the main output filepath 

40 output_analysis_filepath = f'{output_directory}/{OUTPUT_DIST_PERRES_FILENAME}' 

41 

42 # Set a reference system to handle conversions to pytraj residue numeration 

43 # First set the pytraj topology 

44 pytraj_topology = structure.get_pytraj_topology() 

45 pytraj_residues = list(pytraj_topology.residues) 

46 

47 def residue_2_pytraj_residue_index(residue_index: int) -> int: 

48 """Transform a structure residue to the pytraj residue numeration (1, 2, ... n).""" 

49 residue = structure.residues[residue_index] 

50 residue_number = residue.number 

51 residue_name = residue.name[0:3] 

52 # And check that this residue data matches the pytraj residues data 

53 pytraj_residue = pytraj_residues[residue_index] 

54 if (residue_number == pytraj_residue.original_resid and residue_name == pytraj_residue.name): 

55 return residue_index + 1 

56 # If not, we must iterate over all pytraj residues to find a match 

57 for index, pytraj_residue in enumerate(pytraj_residues): 

58 if (residue_number == pytraj_residue.original_resid and residue_name == pytraj_residue.name): 

59 return index + 1 

60 # Return None if there is no match 

61 return None 

62 

63 # Parse the trajectory intro ptraj 

64 # Reduce it in case it exceeds the frames limit 

65 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory( 

66 structure_file.path, trajectory_file.path, snapshots, frames_limit) 

67 # Save each analysis to a dict which will be parsed to json 

68 # Here we keep track of the summary, which will also be parsed to json at the end 

69 output_summary = [] 

70 # Run the analysis for each interaction 

71 print() 

72 for n, interaction in enumerate(interactions): 

73 # Get the interaction name 

74 name = interaction['name'] 

75 # Set a filename for the current interaction data 

76 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, n) 

77 # Add the root of the output analysis filename to the run data 

78 analysis_name = get_analysis_name(numbered_output_analysis_filepath) 

79 # Append current interaction to the summary 

80 output_summary.append({ 

81 'name': name, 

82 'analysis': analysis_name 

83 }) 

84 # If the file already exists then skip to the next 

85 if exists(numbered_output_analysis_filepath): continue 

86 reprint(f' Analyzing {name} ({n+1}/{len(interactions)})') 

87 # Get the residues to be used for this interaction 

88 residues_1 = interaction['residue_indices_1'] 

89 residues_2 = interaction['residue_indices_2'] 

90 # First of all, calculate the number of values for this interaction 

91 # If the interaction has more values than we can store then it will be reduced 

92 # Reduced interactions will store only residues in the interface in order to fit 

93 n_values = len(residues_1) * len(residues_2) 

94 reduced = n_values > N_VALUES_LIMIT 

95 # Contact Matrix -- Initialization 

96 # Create 2 lists filled with 0s with the length of the residue number arrays respectively 

97 if reduced: 

98 warn('The analysis has been reduced to interface residues only for this interaction') 

99 residues_1 = interaction['interface_residue_indices_1'] 

100 residues_2 = interaction['interface_residue_indices_2'] 

101 n_values = len(residues_1) * len(residues_2) 

102 if n_values > N_VALUES_LIMIT: raise ValueError('Too many values, even after reducing') 

103 # Show the size of the matrix 

104 h, w = len(residues_2), len(residues_1) 

105 print(f' {h}x{w} residues') 

106 # Convert residues to pytraj residue numbers 

107 pt_residues_1 = list(map(residue_2_pytraj_residue_index, residues_1)) 

108 pt_residues_2 = list(map(residue_2_pytraj_residue_index, residues_2)) 

109 

110 # Contact Matrix -- Calculation (Vectorized) 

111 # Create all mask pairs at once 

112 mask_pairs = [] 

113 for r2 in pt_residues_2: 

114 for r1 in pt_residues_1: 

115 mask_pairs.append(f":{r2} :{r1}") 

116 

117 # Calculate all distances in one call 

118 all_distances = pt.distance(pt_trajectory, mask_pairs) 

119 

120 # Reshape the results into matrices 

121 # all_distances shape: (n_pairs, n_frames) 

122 means_matrix = np.zeros((h, w)) 

123 stdvs_matrix = np.zeros((h, w)) 

124 

125 pair_idx = 0 

126 for i in range(h): 

127 for j in range(w): 

128 distances = all_distances[pair_idx] 

129 means_matrix[i][j] = np.mean(distances) 

130 stdvs_matrix[i][j] = np.std(distances) 

131 pair_idx += 1 

132 # Set the output data for this interaction 

133 save_json({ 

134 'name': name, 

135 'means': means_matrix.tolist(), 

136 'stdvs': stdvs_matrix.tolist(), 

137 }, numbered_output_analysis_filepath) 

138 # Export the analysis in json format 

139 save_json(output_summary, output_analysis_filepath)