Coverage for mddb_workflow / tools / image_and_fit.py: 15%

66 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1# This script is used to fit a trajectory (i.e. eliminate translation and rotation) 

2# This process is carried by Gromacs 

3 

4from mddb_workflow.utils.structures import Structure 

5from mddb_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY 

6from mddb_workflow.utils.gmx_spells import run_gromacs 

7from mddb_workflow.utils.type_hints import * 

8 

9# Set a pair of independent auxiliar functions to save and recover chains from a pdb file 

10# WARNING: These functions must be used only when the pdb has not changed in number of atoms 

11 

12# Get a list with each atom chain from a pdb 

13def get_chains (pdb_filename : str) -> list: 

14 structure = Structure.from_pdb_file(pdb_filename) 

15 chains = structure.chains 

16 return chains 

17 

18# Set each atom chain from a pdb from a list 

19def set_chains (pdb_filename : str, chains : list): 

20 structure = Structure.from_pdb_file(pdb_filename) 

21 structure.chains = chains 

22 structure.generate_pdb_file(pdb_filename) 

23 

24# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids 

25CENTER_SELECTION_NAME = 'protein_and_nucleic_acids' 

26CENTER_INDEX_FILEPATH = '.index.ndx' 

27 

28# Image and fit a trajectory 

29# image - Set if it must me imaged 

30# fit - Set if it must be fitted 

31# translation - Set how it must be translated during imaging 

32# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses 

33def image_and_fit ( 

34 # Tested supported formats are .pdb and .tpr 

35 input_structure_file : 'File', 

36 # Tested supported formats are .trr and .xtc 

37 input_trajectory_file : 'File', 

38 # This must be in .tpr format 

39 # This is optional if there are no PBC residues 

40 input_topology_file : Union['File', Exception], 

41 output_structure_file : 'File', 

42 output_trajectory_file : 'File', 

43 image : bool, 

44 fit : bool, 

45 translation : list, 

46 # Note that this is an early provisional structure 

47 structure : 'Structure', 

48 # Selection of atoms which are to stay in PBC conditions after imaging 

49 # Note that this is an early provisional atom selection 

50 pbc_selection : 'Selection', 

51 ) -> str: 

52 

53 print(f' Image: {image} | Fit: {fit}') 

54 

55 if not image and not fit: 

56 return 

57 

58 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file 

59 # This is because there is a '-pbc mol' step which only works with a .tpr file 

60 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr' 

61 has_pbc_atoms = bool(pbc_selection) 

62 if image and not is_tpr_available and has_pbc_atoms: 

63 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file') 

64 

65 # If we have coarse grain then we can only fit if we have a tpr file 

66 # It will not work otherwise since we need atom masses 

67 # For some image protocols we need to work with pdbs at some points so we directly surrender 

68 if structure.select_cg(): 

69 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs') 

70 if not is_tpr_available: 

71 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file') 

72 

73 # First of all save chains 

74 # Gromacs will delete chains so we need to recover them after 

75 chains_backup = get_chains(input_structure_file.path) 

76 

77 # Set a custom index file (.ndx) to select protein and nucleic acids 

78 # This is useful for some imaging steps (centering and fitting) 

79 system_selection = structure.select('all', syntax='vmd') 

80 protein_selection = structure.select_protein() 

81 nucleic_selection = structure.select_nucleic() 

82 custom_selection = protein_selection + nucleic_selection 

83 if not custom_selection: 

84 raise InputError(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.') 

85 # Exclude PBC residues from this custom selection 

86 custom_selection -= pbc_selection 

87 # Convert both selections to a single ndx file which gromacs can read 

88 system_selection_ndx = system_selection.to_ndx('System') 

89 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME) 

90 with open(CENTER_INDEX_FILEPATH, 'w') as file: 

91 file.write(system_selection_ndx) 

92 file.write(selection_ndx) 

93 

94 # Imaging -------------------------------------------------------------------------------------- 

95 

96 if image: 

97 print(' Running first imaging step') 

98 

99 # Check if coordinates are to be translated 

100 must_translate = translation != [0, 0, 0] 

101 

102 # If so run the imaging process without the '-center' flag 

103 if must_translate: 

104 # WARNING: Adding '-ur compact' makes everything stay the same 

105 run_gromacs(f'trjconv -s {input_structure_file.path} \ 

106 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

107 -trans {translation[0]} {translation[1]} {translation[2]} \ 

108 -pbc atom', user_input = 'System', show_error_logs = True) 

109 # If no translation is required and we have a tpr then use it to make molecules whole 

110 elif is_tpr_available: 

111 run_gromacs(f'trjconv -s {input_topology_file.path} \ 

112 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

113 -pbc whole', user_input = 'System', show_error_logs = True) 

114 # Otherwise, center the custom selection 

115 else: 

116 run_gromacs(f'trjconv -s {input_structure_file.path} \ 

117 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

118 -pbc atom -center -n {CENTER_INDEX_FILEPATH}', 

119 user_input = f'{CENTER_SELECTION_NAME} System', show_error_logs = True) 

120 

121 # Select the first frame of the recently imaged trayectory as the new topology 

122 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

123 

124 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime 

125 if has_pbc_atoms: 

126 print(' Fencing PBC molecules back to the box') 

127 # Run Gromacs 

128 run_gromacs(f'trjconv -s {input_topology_file.path} \ 

129 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

130 -pbc mol -n {CENTER_INDEX_FILEPATH}', 

131 user_input = 'System', show_error_logs = True) 

132 

133 # Select the first frame of the recently translated and imaged trayectory as the new topology 

134 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

135 

136 

137 # ----------------------------------------------------------------------------------------------- 

138 

139 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule 

140 else: 

141 print(' Running no-jump') 

142 # Run Gromacs 

143 # Expanding the box may help in some situations but there are secondary effects 

144 # i.e. -box 999 999 999 

145 run_gromacs(f'trjconv -s {output_structure_file.path} \ 

146 -f {output_trajectory_file.path} -o {output_trajectory_file.path} \ 

147 -pbc nojump', 

148 user_input = 'System', show_error_logs = True) 

149 

150 # Fitting -------------------------------------------------------------------------------------- 

151 

152 # Remove translation and rotation in the trajectory using the custom selection as reference 

153 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted 

154 if fit: 

155 print(' Running fitting step') 

156 

157 # The trajectory to fit is the already imaged trajectory 

158 # However, if there was no imaging, the trajectory to fit is the input trajectory 

159 structure_to_fit = output_structure_file if image else input_structure_file 

160 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file 

161 

162 # If we have a TPR then use it here 

163 # DANI: No estoy seguro de si funcionaría bien después de un imageado 

164 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada 

165 if is_tpr_available: structure_to_fit = input_topology_file 

166 

167 # Run Gromacs 

168 run_gromacs(f'trjconv -s {structure_to_fit.path} \ 

169 -f {trajectroy_to_fit.path} -o {output_trajectory_file.path} \ 

170 -fit rot+trans -n {CENTER_INDEX_FILEPATH}', 

171 user_input = f'{CENTER_SELECTION_NAME} System', 

172 show_error_logs = True) 

173 

174 # If there is no output structure at this time (i.e. there was no imaging) then create it now 

175 # Note that the input structure is not necessarily the output structure in this scenario 

176 # The fit may have been done using the topology and there may be an offset, so better dump it 

177 if not output_structure_file.exists: 

178 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

179 

180 # Recover chains 

181 set_chains(output_structure_file.path, chains_backup) 

182 

183 # Clean up the index file 

184 # if exists(CENTER_INDEX_FILEPATH): 

185 # remove(CENTER_INDEX_FILEPATH) 

186 

187 

188# Get the first frame of a trajectory 

189def reset_structure ( 

190 input_structure_filepath : str, 

191 input_trajectory_filepath : str, 

192 output_structure_filepath : str, 

193): 

194 print(' Reseting structure after imaging step') 

195 run_gromacs(f'trjconv -s {input_structure_filepath} \ 

196 -f {input_trajectory_filepath} -o {output_structure_filepath} \ 

197 -dump 0', user_input = 'System')