Coverage for mddb_workflow/analyses/rmsd

1# RMSD pairwise analysis

3# Perform the RMSD analysis for pair of frames in the trajectory

4# The analysis is carried by pytraj

6from os.path import exists

8import pytraj as pt

10from mddb_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory

11from mddb_workflow.utils.auxiliar import save_json, numerate_filename, get_analysis_name, warn

12from mddb_workflow.utils.auxiliar import ToolError

13from mddb_workflow.utils.constants import OUTPUT_RMSD_PAIRWISE_FILENAME

14from mddb_workflow.utils.type_hints import *

16# The 'interactions' input is mandatory but it may be an empty list (i.e. there are no interactions)

17# Take a minimal subset of atoms representing both proteins and nucleic acids

18def rmsd_pairwise(

19 structure_file : str,

20 trajectory_file : str,

21 output_directory : str,

22 interactions : list,

23 snapshots : int,

24 structure : 'Structure',

25 pbc_selection : 'Selection',

26 frames_limit : int = 200,

27 overall_selection : str = "name CA or name C5'", # equivalent to "@CA,C5'" in pytraj

28 ):

29 """Perform an analysis for the overall structure and then one more analysis for each interaction."""

31 # Set the main output filepath

32 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PAIRWISE_FILENAME}'

34 # Parse the trajectory intro ptraj

35 # Reduce it in case it exceeds the frames limit

36 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory(

37 structure_file.path, trajectory_file.path, snapshots, frames_limit)

39 # Parse the overall selection

40 parsed_overall_selection = structure.select(overall_selection, syntax='vmd')

41 # If the default selection is empty then use all atoms instead

42 if not parsed_overall_selection:

43 print(f' Default selection "{overall_selection}" is empty -> All atoms will be used instead')

44 parsed_overall_selection = structure.select_all()

45 # If the default selection has one atom only then use all atoms instead

46 # Note that a single atom selection will lead to all RMSD values beeing 0 since there is a previous alignment

47 if len(parsed_overall_selection) == 1:

48 print(f' Default selection "{overall_selection}" has 1 atom only -> All atoms will be used instead')

49 parsed_overall_selection = structure.select_all()

51 # Remove PBC residues from the selection

52 parsed_overall_selection -= pbc_selection

53 if not parsed_overall_selection:

54 print(f' Empty selection after substracting PBC atoms')

55 return

57 # Save each analysis to a dict which will be parsed to json

58 output_summary = []

60 # Set a filename for the current interaction data

61 overall_output_analysis_filepath = numerate_filename(output_analysis_filepath, 0)

62 # Get the root name out of the output analysis filepath

63 overall_analysis_name = get_analysis_name(overall_output_analysis_filepath)

64 # Add it to the summary

65 output_summary.append({

66 'name': 'Overall',

67 'analysis': overall_analysis_name,

68 })

70 # If the overall output file does not exist then run the overall analysis

71 if not exists(overall_output_analysis_filepath):

72 print(f' Analyzing overall structure ({len(parsed_overall_selection)} atoms)')

73 # Run the analysis

74 data = pt.pairwise_rmsd(pt_trajectory, parsed_overall_selection.to_pytraj())

75 # Convert data to a normal list, since numpy ndarrays are not json serializable

76 data = data.tolist()

77 # In case there is no data we stop here

78 if len(data) == 0: raise ToolError('Something went wrong with pytraj')

79 # Write the overall analysis output data to disk

80 save_json({

81 'name': 'Overall',

82 'rmsds': data,

83 'start': 0,

84 'step': frame_step

85 }, overall_output_analysis_filepath)

87 # Repeat the analysis with the interface residues of each interaction

88 for i, interaction in enumerate(interactions, 1):

89 # Get the interaction name

90 name = interaction['name']

91 # Parse the interaction selection

92 interaction_atom_indices = interaction['interface_atom_indices_1'] + interaction['interface_atom_indices_2']

93 interaction_selection = structure.select_atom_indices(interaction_atom_indices)

94 # If the interaction selection matches the overall selection then reuse its data

95 # This was very unlinkly until we started to support coarse grain systems

96 # Now a CG DNA-DNA hybridization will likely fall here

97 if interaction_selection == parsed_overall_selection:

98 # Warn the user

99 warning = 'Interface selection is identical to overall selection. Data will be reused.'

100 warn(f'{name}: {warning}')

101 output_summary.append({

102 'name': name,

103 # Add a note so nobody thinks this is an error

104 'note': warning,

105 'analysis': overall_analysis_name

106 })

107 # At this point overall analysis outout should already exists so we skip the analysis

108 continue

109

110 # Set a filename for the current interaction data

111 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)

112 # Add the root of the output analysis filename to the run data

113 analysis_name = get_analysis_name(numbered_output_analysis_filepath)

114 # Append current interaction to the summary

115 output_summary.append({

116 'name': name,

117 'analysis': analysis_name

118 })

119

120 # If the analysis already exists then proceed to the next interaction

121 if exists(numbered_output_analysis_filepath): continue

122 print(f' Analyzing "{name}" interface ({len(interaction_selection)} atoms)')

123 # Run the analysis

124 data = pt.pairwise_rmsd(pt_trajectory, interaction_selection.to_pytraj())

125 # Convert data to a normal list, since numpy ndarrays are not json serializable

126 data = data.tolist()

127 # Write the interaction analysis output data to disk

128 save_json({

129 'name': name,

130 'rmsds': data,

131 'start': 0,

132 'step': frame_step

133 }, numbered_output_analysis_filepath)

134

135 # Write in the analysis the starting frame and the step between frames

136 # By default the first frame in the reduced trajectory is the first frame (0)

137

138 # Export the analysis in json format

139 save_json(output_summary, output_analysis_filepath)

Coverage for mddb_workflow / analyses / rmsd_pairwise.py: 84%

49 statements