Coverage for mddb_workflow / analyses / rmsd_pairwise.py: 84%

49 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1# RMSD pairwise analysis 

2# 

3# Perform the RMSD analysis for pair of frames in the trajectory 

4# The analysis is carried by pytraj 

5 

6from os.path import exists 

7 

8import pytraj as pt 

9 

10from mddb_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory 

11from mddb_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn 

12from mddb_workflow.utils.auxiliar import ToolError 

13from mddb_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME 

14from mddb_workflow.utils.type_hints import * 

15 

16# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions) 

17# Take a minimal subset of atoms representing both proteins and nucleic acids 

18def rmsd_pairwise( 

19 structure_file : str, 

20 trajectory_file : str, 

21 output_directory : str, 

22 interactions : list, 

23 snapshots : int, 

24 structure : 'Structure', 

25 pbc_selection : 'Selection', 

26 frames_limit : int = 200, 

27 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj 

28 ): 

29 """Perform an analysis for the overall structure and then one more analysis for each interaction.""" 

30 

31 # Set the main output filepath 

32 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}' 

33 

34 # Parse the trajectory intro ptraj 

35 # Reduce it in case it exceeds the frames limit 

36 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory( 

37 structure_file.path, trajectory_file.path, snapshots, frames_limit) 

38 

39 # Parse the overall selection 

40 parsed_overall_selection = structure.select(overall_selection, syntax='vmd') 

41 # If the default selection is empty then use all atoms instead 

42 if not parsed_overall_selection: 

43 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead') 

44 parsed_overall_selection = structure.select_all() 

45 # If the default selection has one atom only then use all atoms instead 

46 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment 

47 if len(parsed_overall_selection) == 1: 

48 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead') 

49 parsed_overall_selection = structure.select_all() 

50 

51 # Remove PBC residues from the selection 

52 parsed_overall_selection -= pbc_selection 

53 if not parsed_overall_selection: 

54 print(f' Empty selection after substracting PBC atoms') 

55 return 

56 

57 # Save each analysis to a dict which will be parsed to json 

58 output_summary = [] 

59 

60 # Set a filename for the current interaction data 

61 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0) 

62 # Get the root name out of the output analysis filepath 

63 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath) 

64 # Add it to the summary 

65 output_summary.append({ 

66 'name': 'Overall', 

67 'analysis': overall_analysis_name, 

68 }) 

69 

70 # If the overall output file does not exist then run the overall analysis 

71 if not exists(overall_output_analysis_filepath): 

72 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)') 

73 # Run the analysis 

74 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj()) 

75 # Convert data to a normal list, since numpy ndarrays are not json serializable 

76 data = data.tolist() 

77 # In case there is no data we stop here 

78 if len(data) == 0: raise ToolError('Something went wrong with pytraj') 

79 # Write the overall analysis output data to disk 

80 save_json({ 

81 'name': 'Overall', 

82 'rmsds': data, 

83 'start': 0, 

84 'step': frame_step 

85 }, overall_output_analysis_filepath) 

86 

87 # Repeat the analysis with the interface residues of each interaction 

88 for i, interaction in enumerate(interactions, 1): 

89 # Get the interaction name 

90 name = interaction['name'] 

91 # Parse the interaction selection 

92 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2'] 

93 interaction_selection = structure.select_atom_indices(interaction_atom_indices) 

94 # If the interaction selection matches the overall selection then reuse its data 

95 # This was very unlinkly until we started to support coarse grain systems 

96 # Now a CG DNA-DNA hybridization will likely fall here 

97 if interaction_selection == parsed_overall_selection: 

98 # Warn the user 

99 warning = 'Interface selection is identical to overall selection. Data will be reused.' 

100 warn(f'{name}: {warning}') 

101 output_summary.append({ 

102 'name': name, 

103 # Add a note so nobody thinks this is an error 

104 'note': warning, 

105 'analysis': overall_analysis_name 

106 }) 

107 # At this point overall analysis outout should already exists so we skip the analysis 

108 continue 

109 

110 # Set a filename for the current interaction data 

111 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i) 

112 # Add the root of the output analysis filename to the run data 

113 analysis_name = get_analysis_name(numbered_output_analysis_filepath) 

114 # Append current interaction to the summary 

115 output_summary.append({ 

116 'name': name, 

117 'analysis': analysis_name 

118 }) 

119 

120 # If the analysis already exists then proceed to the next interaction 

121 if exists(numbered_output_analysis_filepath): continue 

122 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)') 

123 # Run the analysis 

124 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj()) 

125 # Convert data to a normal list, since numpy ndarrays are not json serializable 

126 data = data.tolist() 

127 # Write the interaction analysis output data to disk 

128 save_json({ 

129 'name': name, 

130 'rmsds': data, 

131 'start': 0, 

132 'step': frame_step 

133 }, numbered_output_analysis_filepath) 

134 

135 # Write in the analysis the starting frame and the step between frames 

136 # By default the first frame in the reduced trajectory is the first frame (0) 

137 

138 # Export the analysis in json format 

139 save_json(output_summary, output_analysis_filepath)