Coverage for mddb_workflow/analyses/rmsds.py: 96%

1from mddb_workflow.tools.xvg_parse import xvg_parse

2from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory

3from mddb_workflow.utils.auxiliar import save_json

4from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_RMSDS_FILENAME

5from mddb_workflow.utils.gmx_spells import run_gromacs

6from mddb_workflow.utils.type_hints import *

8from os import remove

11def rmsds(

12 trajectory_file : 'File',

13 first_frame_file : 'File',

14 average_structure_file : 'File',

15 output_directory : str,

16 snapshots : int,

17 structure : 'Structure',

18 pbc_selection : 'Selection',

19 inchikey_map : list[dict],

20 frames_limit : int = 5000,

21 ):

22 """Run multiple RMSD analyses. One with each reference (first frame, average structure)

23 and each selection (default: protein, nucleic)."""

24 # Set the main output filepath

25 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSDS_FILENAME}'

27 # Set the default selections to be analyzed

28 default_selections = {

29 'protein': structure.select_protein(),

30 'nucleic': structure.select_nucleic()

31 }

33 # Set selections to be analyzed

34 selections = { **default_selections }

36 # If there is a ligand map then parse them to selections as well

37 for ligand in inchikey_map:

38 if ligand['is_lipid']: continue

39 selection_name = 'ligand ' + ligand['name']

40 selection = structure.select_residue_indices(ligand['residue_indices'])

41 # If the ligand has less than 3 atoms then gromacs can not fit it so it will fail

42 if len(selection) < 3: continue

43 # Add current ligand selection to be analyzed

44 selections[selection_name] = selection

45 # If the ligand selection totally overlaps with a default selection then remove the default

46 for default_selection_name, default_selection in default_selections.items():

47 if default_selection == selection: del selections[default_selection_name]

49 # If there is anything left apart from proteins, nucleic acids and ligands then select it

50 missing_regions = structure.select_all()

51 for selection in selections.values():

52 missing_regions -= selection

53 if missing_regions: selections['other'] = missing_regions

55 # Remove PBC residues from parsed selections

56 non_pbc_selections = {}

57 for selection_name, selection in selections.items():

58 # Substract PBC atoms

59 non_pbc_selection = selection - pbc_selection

60 # If selection after substracting pbc atoms becomes empty then discard it

61 if not non_pbc_selection: continue

62 # Add the the filtered selection to the dict

63 non_pbc_selections[selection_name] = non_pbc_selection

65 # If there is nothing lo analyze at this point then skip the analysis

66 if len(non_pbc_selections) == 0:

67 print(' The RMSDs analysis will be skipped since there is nothing to analyze')

68 return

70 # Get the coarse grain selection

71 cg_selection = structure.select_cg()

73 # The start will be always 0 since we start with the first frame

74 start = 0

76 # Reduce the trajectory according to the frames limit

77 # Use a reduced trajectory in case the original trajectory has many frames

78 # Note that it makes no difference which reference is used here

79 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(

80 first_frame_file,

81 trajectory_file,

82 snapshots,

83 frames_limit,

84 )

86 # Save results in this array

87 output_analysis = []

89 # Set the reference structures to run the RMSD against

90 rmsd_references = [first_frame_file, average_structure_file]

92 # Iterate over each reference and group

93 for reference in rmsd_references:

94 # Get a standarized reference name

95 reference_name = REFERENCE_LABELS[reference.filename]

96 for group_name, group_selection in non_pbc_selections.items():

97 # Set the analysis filename

98 rmsd_analysis = f'rmsd.{reference_name}.{group_name.lower().replace(" ","_")}.xvg'

99 # If part of the selection has coarse grain atoms then skip mass weighting

100 # Otherwise Gromacs may fail since the atom name may not be in the atommass library

101 has_cg = group_selection & cg_selection

102 # Run the rmsd

103 print(f' Reference: {reference_name}, Selection: {group_name},{" NOT" if has_cg else ""} mass weighted')

104 rmsd(reference.path, reduced_trajectory_filepath, group_selection, rmsd_analysis, skip_mass_weighting=has_cg)

105 # Read and parse the output file

106 rmsd_data = xvg_parse(rmsd_analysis, ['times', 'values'])

107 # Format the mined data and append it to the overall output

108 # Multiply by 10 since rmsd comes in nanometers (nm) and we want it in Ångstroms (Å)

109 rmsd_values = [ v*10 for v in rmsd_data['values'] ]

110 data = {

111 'values': rmsd_values,

112 'reference': reference_name,

113 'group': group_name

114 }

115 output_analysis.append(data)

116 # Remove the analysis xvg file since it is not required anymore

117 remove(rmsd_analysis)

118

119 # Export the analysis in json format

120 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)

121

122# RMSD

123#

124# Perform the RMSd analysis

125def rmsd (

126 reference_filepath : str,

127 trajectory_filepath : str,

128 selection : 'Selection', # This selection will never be empty, since this is checked previously

129 output_analysis_filepath : str,

130 skip_mass_weighting : bool = False):

131

132 # Convert the selection to a ndx file gromacs can read

133 selection_name = 'rmsd_selection'

134 ndx_selection = selection.to_ndx(selection_name)

135 ndx_filename = '.rmsd.ndx'

136 with open(ndx_filename, 'w') as file:

137 file.write(ndx_selection)

138

139 # Run Gromacs

140 run_gromacs(f'rms -s {reference_filepath} -f {trajectory_filepath} \

141 -o {output_analysis_filepath} -n {ndx_filename} {"-mw no" if skip_mass_weighting else ""}',

142 user_input = f'{selection_name} {selection_name}')

143

144 # Remove the ndx file

145 remove(ndx_filename)

Coverage for mddb_workflow / analyses / rmsds.py: 96%

57 statements