Coverage for model_workflow/analyses/rmsd

1# RMSD pairwise analysis

3# Perform the RMSD analysis for pair of frames in the trajectory

4# The analysis is carried by pytraj

6from os.path import exists

8import pytraj as pt

10from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory

11from model_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn

12from model_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME

13from model_workflow.utils.type_hints import *

15# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions)

16# Take a minimal subset of atoms representing both proteins and nucleic acids

17def rmsd_pairwise(

18 structure_file : str,

19 trajectory_file : str,

20 output_directory : str,

21 interactions : list,

22 snapshots : int,

23 structure : 'Structure',

24 pbc_selection : 'Selection',

25 frames_limit : int = 200,

26 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj

27 ):

28 """Perform an analysis for the overall structure and then one more analysis for each interaction."""

30 # Set the main output filepath

31 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}'

33 # Parse the trajectory intro ptraj

34 # Reduce it in case it exceeds the frames limit

35 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(

36 structure_file.path, trajectory_file.path, snapshots, frames_limit)

38 # Parse the overall selection

39 parsed_overall_selection = structure.select(overall_selection, syntax='vmd')

40 # If the default selection is empty then use all atoms instead

41 if not parsed_overall_selection:

42 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead')

43 parsed_overall_selection = structure.select_all()

44 # If the default selection has one atom only then use all atoms instead

45 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment

46 if len(parsed_overall_selection) == 1:

47 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead')

48 parsed_overall_selection = structure.select_all()

50 # Remove PBC residues from the selection

51 parsed_overall_selection -= pbc_selection

52 if not parsed_overall_selection:

53 print(f' Empty selection after substracting PBC atoms')

54 return

56 # Save each analysis to a dict which will be parsed to json

57 output_summary = []

59 # Set a filename for the current interaction data

60 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0)

61 # Get the root name out of the output analysis filepath

62 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath)

63 # Add it to the summary

64 output_summary.append({

65 'name': 'Overall',

66 'analysis': overall_analysis_name,

67 })

69 # If the overall output file does not exist then run the overall analysis

70 if not exists(overall_output_analysis_filepath):

71 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)')

72 # Run the analysis

73 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj())

74 # Convert data to a normal list, since numpy ndarrays are not json serializable

75 data = data.tolist()

76 # In case there is no data we stop here

77 if len(data) == 0: raise SystemExit('Something went wrong with pytraj')

78 # Write the overall analysis output data to disk

79 save_json({

80 'name': 'Overall',

81 'rmsds': data,

82 'start': 0,

83 'step': frame_step

84 }, overall_output_analysis_filepath)

86 # Repeat the analysis with the interface residues of each interaction

87 for i, interaction in enumerate(interactions, 1):

88 # Get the interaction name

89 name = interaction['name']

90 # Parse the interaction selection

91 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2']

92 interaction_selection = structure.select_atom_indices(interaction_atom_indices)

93 # If the interaction selection matches the overall selection then reuse its data

94 # This was very unlinkly until we started to support coarse grain systems

95 # Now a CG DNA-DNA hybridization will likely fall here

96 if interaction_selection == parsed_overall_selection:

97 # Warn the user

98 warning = 'Interface selection is identical to overall selection. Data will be reused.'

99 warn(f'{name}: {warning}')

100 output_summary.append({

101 'name': name,

102 # Add a note so nobody thinks this is an error

103 'note': warning,

104 'analysis': overall_analysis_name

105 })

106 # At this point overall analysis outout should already exists so we skip the analysis

107 continue

108

109 # Set a filename for the current interaction data

110 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)

111 # Add the root of the output analysis filename to the run data

112 analysis_name = get_analysis_name(numbered_output_analysis_filepath)

113 # Append current interaction to the summary

114 output_summary.append({

115 'name': name,

116 'analysis': analysis_name

117 })

118

119 # If the analysis already exists then proceed to the next interaction

120 if exists(numbered_output_analysis_filepath): continue

121 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)')

122 # Run the analysis

123 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj())

124 # Convert data to a normal list, since numpy ndarrays are not json serializable

125 data = data.tolist()

126 # Write the interaction analysis output data to disk

127 save_json({

128 'name': name,

129 'rmsds': data,

130 'start': 0,

131 'step': frame_step

132 }, numbered_output_analysis_filepath)

133

134 # Write in the analysis the starting frame and the step between frames

135 # By default the first frame in the reduced trajectory is the first frame (0)

136

137 # Export the analysis in json format

138 save_json(output_summary, output_analysis_filepath)

Coverage for model_workflow/analyses/rmsd_pairwise.py: 79%

48 statements