Coverage for model_workflow/analyses/rmsd_pairwise.py: 79%

48 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1# RMSD pairwise analysis 

2# 

3# Perform the RMSD analysis for pair of frames in the trajectory 

4# The analysis is carried by pytraj 

5 

6from os.path import exists 

7 

8import pytraj as pt 

9 

10from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory 

11from model_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn 

12from model_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME 

13from model_workflow.utils.type_hints import * 

14 

15# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions) 

16# Take a minimal subset of atoms representing both proteins and nucleic acids 

17def rmsd_pairwise( 

18 structure_file : str, 

19 trajectory_file : str, 

20 output_directory : str, 

21 interactions : list, 

22 snapshots : int, 

23 structure : 'Structure', 

24 pbc_selection : 'Selection', 

25 frames_limit : int = 200, 

26 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj 

27 ): 

28 """Perform an analysis for the overall structure and then one more analysis for each interaction.""" 

29 

30 # Set the main output filepath 

31 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}' 

32 

33 # Parse the trajectory intro ptraj 

34 # Reduce it in case it exceeds the frames limit 

35 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory( 

36 structure_file.path, trajectory_file.path, snapshots, frames_limit) 

37 

38 # Parse the overall selection 

39 parsed_overall_selection = structure.select(overall_selection, syntax='vmd') 

40 # If the default selection is empty then use all atoms instead 

41 if not parsed_overall_selection: 

42 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead') 

43 parsed_overall_selection = structure.select_all() 

44 # If the default selection has one atom only then use all atoms instead 

45 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment 

46 if len(parsed_overall_selection) == 1: 

47 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead') 

48 parsed_overall_selection = structure.select_all() 

49 

50 # Remove PBC residues from the selection 

51 parsed_overall_selection -= pbc_selection 

52 if not parsed_overall_selection: 

53 print(f' Empty selection after substracting PBC atoms') 

54 return 

55 

56 # Save each analysis to a dict which will be parsed to json 

57 output_summary = [] 

58 

59 # Set a filename for the current interaction data 

60 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0) 

61 # Get the root name out of the output analysis filepath 

62 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath) 

63 # Add it to the summary 

64 output_summary.append({ 

65 'name': 'Overall', 

66 'analysis': overall_analysis_name, 

67 }) 

68 

69 # If the overall output file does not exist then run the overall analysis 

70 if not exists(overall_output_analysis_filepath): 

71 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)') 

72 # Run the analysis 

73 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj()) 

74 # Convert data to a normal list, since numpy ndarrays are not json serializable 

75 data = data.tolist() 

76 # In case there is no data we stop here 

77 if len(data) == 0: raise SystemExit('Something went wrong with pytraj') 

78 # Write the overall analysis output data to disk 

79 save_json({ 

80 'name': 'Overall', 

81 'rmsds': data, 

82 'start': 0, 

83 'step': frame_step 

84 }, overall_output_analysis_filepath) 

85 

86 # Repeat the analysis with the interface residues of each interaction 

87 for i, interaction in enumerate(interactions, 1): 

88 # Get the interaction name 

89 name = interaction['name'] 

90 # Parse the interaction selection 

91 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2'] 

92 interaction_selection = structure.select_atom_indices(interaction_atom_indices) 

93 # If the interaction selection matches the overall selection then reuse its data 

94 # This was very unlinkly until we started to support coarse grain systems 

95 # Now a CG DNA-DNA hybridization will likely fall here 

96 if interaction_selection == parsed_overall_selection: 

97 # Warn the user 

98 warning = 'Interface selection is identical to overall selection. Data will be reused.' 

99 warn(f'{name}: {warning}') 

100 output_summary.append({ 

101 'name': name, 

102 # Add a note so nobody thinks this is an error 

103 'note': warning, 

104 'analysis': overall_analysis_name 

105 }) 

106 # At this point overall analysis outout should already exists so we skip the analysis 

107 continue 

108 

109 # Set a filename for the current interaction data 

110 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i) 

111 # Add the root of the output analysis filename to the run data 

112 analysis_name = get_analysis_name(numbered_output_analysis_filepath) 

113 # Append current interaction to the summary 

114 output_summary.append({ 

115 'name': name, 

116 'analysis': analysis_name 

117 }) 

118 

119 # If the analysis already exists then proceed to the next interaction 

120 if exists(numbered_output_analysis_filepath): continue 

121 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)') 

122 # Run the analysis 

123 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj()) 

124 # Convert data to a normal list, since numpy ndarrays are not json serializable 

125 data = data.tolist() 

126 # Write the interaction analysis output data to disk 

127 save_json({ 

128 'name': name, 

129 'rmsds': data, 

130 'start': 0, 

131 'step': frame_step 

132 }, numbered_output_analysis_filepath) 

133 

134 # Write in the analysis the starting frame and the step between frames 

135 # By default the first frame in the reduced trajectory is the first frame (0) 

136 

137 # Export the analysis in json format 

138 save_json(output_summary, output_analysis_filepath)