Coverage for model_workflow/tools/image_and_fit.py: 12%

92 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1# This script is used to fit a trajectory (i.e. eliminate translation and rotation) 

2# This process is carried by Gromacs 

3 

4from subprocess import run, PIPE, Popen 

5 

6from model_workflow.utils.constants import GROMACS_EXECUTABLE, GREY_HEADER, COLOR_END 

7from model_workflow.utils.structures import Structure 

8from model_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY 

9from model_workflow.utils.type_hints import * 

10 

11# Set a pair of independent auxiliar functions to save and recover chains from a pdb file 

12# WARNING: These functions must be used only when the pdb has not changed in number of atoms 

13 

14# Get a list with each atom chain from a pdb 

15def get_chains (pdb_filename : str) -> list: 

16 structure = Structure.from_pdb_file(pdb_filename) 

17 chains = structure.chains 

18 return chains 

19 

20# Set each atom chain from a pdb from a list 

21def set_chains (pdb_filename : str, chains : list): 

22 structure = Structure.from_pdb_file(pdb_filename) 

23 structure.chains = chains 

24 structure.generate_pdb_file(pdb_filename) 

25 

26# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids 

27CENTER_SELECTION_NAME = 'protein_and_nucleic_acids' 

28CENTER_INDEX_FILEPATH = '.index.ndx' 

29 

30# Image and fit a trajectory 

31# image - Set if it must me imaged 

32# fit - Set if it must be fitted 

33# translation - Set how it must be translated during imaging 

34# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses 

35def image_and_fit ( 

36 # Tested supported formats are .pdb and .tpr 

37 input_structure_file : 'File', 

38 # Tested supported formats are .trr and .xtc 

39 input_trajectory_file : 'File', 

40 # This must be in .tpr format 

41 # This is optional if there are no PBC residues 

42 input_topology_file : Union['File', Exception], 

43 output_structure_file : 'File', 

44 output_trajectory_file : 'File', 

45 image : bool, fit : bool, 

46 translation : list, 

47 # Note that this is an early provisional structure 

48 structure : 'Structure', 

49 # Selection of atoms which are to stay in PBC conditions after imaging 

50 # Note that this is an early provisional atom selection 

51 pbc_selection : 'Selection', 

52 ) -> str: 

53 

54 print(f' Image: {image} | Fit: {fit}') 

55 

56 if not image and not fit: 

57 return 

58 

59 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file 

60 # This is because there is a '-pbc mol' step which only works with a .tpr file 

61 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr' 

62 has_pbc_atoms = bool(pbc_selection) 

63 if image and not is_tpr_available and has_pbc_atoms: 

64 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file') 

65 

66 # If we have coarse grain then we can only fit if we have a tpr file 

67 # It will not work otherwise since we need atom masses 

68 # For some image protocols we need to work with pdbs at some points so we directly surrender 

69 if structure.select_cg(): 

70 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs') 

71 if not is_tpr_available: 

72 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file') 

73 

74 # First of all save chains 

75 # Gromacs will delete chains so we need to recover them after 

76 chains_backup = get_chains(input_structure_file.path) 

77 

78 # Set a custom index file (.ndx) to select protein and nucleic acids 

79 # This is useful for some imaging steps (centering and fitting) 

80 system_selection = structure.select('all', syntax='vmd') 

81 protein_selection = structure.select_protein() 

82 nucleic_selection = structure.select_nucleic() 

83 custom_selection = protein_selection + nucleic_selection 

84 if not custom_selection: 

85 raise SystemExit(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.') 

86 # Exclude PBC residues from this custom selection 

87 custom_selection -= pbc_selection 

88 # Convert both selections to a single ndx file which gromacs can read 

89 system_selection_ndx = system_selection.to_ndx('System') 

90 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME) 

91 with open(CENTER_INDEX_FILEPATH, 'w') as file: 

92 file.write(system_selection_ndx) 

93 file.write(selection_ndx) 

94 

95 # Imaging -------------------------------------------------------------------------------------- 

96 

97 if image: 

98 print(' Running first imaging step') 

99 

100 # Check if coordinates are to be translated  

101 must_translate = translation != [0, 0, 0] 

102 

103 # Set logs grey 

104 print(GREY_HEADER) 

105 

106 # If so run the imaging process without the '-center' flag 

107 if must_translate: 

108 p = Popen([ 

109 "echo", 

110 "System", 

111 ], stdout=PIPE) 

112 process = run([ 

113 GROMACS_EXECUTABLE, 

114 "trjconv", 

115 "-s", 

116 input_structure_file.path, 

117 "-f", 

118 input_trajectory_file.path, 

119 '-o', 

120 output_trajectory_file.path, 

121 '-trans', 

122 str(translation[0]), 

123 str(translation[1]), 

124 str(translation[2]), 

125 '-pbc', 

126 'atom', 

127 #'-ur', # WARNING: This makes everything stay the same 

128 #'compact', # WARNING: This makes everything stay the same 

129 '-quiet' 

130 ], stdin=p.stdout, stdout=PIPE) 

131 # If no translation is required and we have a tpr then use it to make molecules whole 

132 elif is_tpr_available: 

133 p = Popen([ 

134 "echo", 

135 CENTER_SELECTION_NAME, 

136 "System", 

137 ], stdout=PIPE) 

138 process = run([ 

139 GROMACS_EXECUTABLE, 

140 "trjconv", 

141 "-s", 

142 input_topology_file.path, 

143 "-f", 

144 input_trajectory_file.path, 

145 '-o', 

146 output_trajectory_file.path, 

147 '-pbc', 

148 'whole', 

149 '-quiet' 

150 ], stdin=p.stdout, stdout=PIPE) 

151 # Otherwise, center the custom selection 

152 else: 

153 p = Popen([ 

154 "echo", 

155 CENTER_SELECTION_NAME, 

156 "System", 

157 ], stdout=PIPE) 

158 process = run([ 

159 GROMACS_EXECUTABLE, 

160 "trjconv", 

161 "-s", 

162 input_structure_file.path, 

163 "-f", 

164 input_trajectory_file.path, 

165 '-o', 

166 output_trajectory_file.path, 

167 '-pbc', 

168 'atom', 

169 '-center', 

170 '-n', 

171 CENTER_INDEX_FILEPATH, 

172 '-quiet' 

173 ], stdin=p.stdout, stdout=PIPE) 

174 # Run the defined process 

175 logs = process.stdout.decode() 

176 p.stdout.close() 

177 print(COLOR_END) 

178 

179 # Check the output exists 

180 if not output_trajectory_file.exists: 

181 print(logs) 

182 raise Exception('Something went wrong with Gromacs during first imaging step') 

183 

184 # Select the first frame of the recently imaged trayectory as the new topology 

185 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

186 

187 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime 

188 if has_pbc_atoms: 

189 print(' Fencing PBC molecules back to the box') 

190 print(GREY_HEADER) 

191 # Run Gromacs 

192 p = Popen([ 

193 "echo", 

194 "System", 

195 ], stdout=PIPE) 

196 logs = run([ 

197 GROMACS_EXECUTABLE, 

198 "trjconv", 

199 "-s", 

200 input_topology_file.path, 

201 "-f", 

202 output_trajectory_file.path, 

203 '-o', 

204 output_trajectory_file.path, 

205 '-pbc', 

206 'mol', # Note that the 'mol' option requires a tpr to be passed 

207 '-n', 

208 CENTER_INDEX_FILEPATH, 

209 '-quiet' 

210 ], stdin=p.stdout, stdout=PIPE).stdout.decode() 

211 p.stdout.close() 

212 print(COLOR_END) 

213 

214 # Select the first frame of the recently translated and imaged trayectory as the new topology 

215 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

216 

217 

218 # ----------------------------------------------------------------------------------------------- 

219 

220 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule 

221 else: 

222 print(' Running no-jump') 

223 print(GREY_HEADER) 

224 # Run Gromacs 

225 p = Popen([ 

226 "echo", 

227 "System", 

228 ], stdout=PIPE) 

229 logs = run([ 

230 GROMACS_EXECUTABLE, 

231 "trjconv", 

232 "-s", 

233 output_structure_file.path, 

234 "-f", 

235 output_trajectory_file.path, 

236 '-o', 

237 output_trajectory_file.path, 

238 # Expanding the box may help in some situations 

239 # However there are secondary effects for the trajectory 

240 #'-box', 

241 #'999', 

242 #'999', 

243 #'999', 

244 '-pbc', 

245 'nojump', 

246 '-quiet' 

247 ], stdin=p.stdout, stdout=PIPE).stdout.decode() 

248 p.stdout.close() 

249 print(COLOR_END) 

250 

251 # Fitting -------------------------------------------------------------------------------------- 

252 

253 # Remove translation and rotation in the trajectory using the custom selection as reference 

254 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted 

255 if fit: 

256 print(' Running fitting step') 

257 

258 # The trajectory to fit is the already imaged trajectory 

259 # However, if there was no imaging, the trajectory to fit is the input trajectory 

260 structure_to_fit = output_structure_file if image else input_structure_file 

261 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file 

262 

263 # If we have a TPR then use it here 

264 # DANI: No estoy seguro de si funcionaría bien después de un imageado 

265 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada 

266 if is_tpr_available: structure_to_fit = input_topology_file 

267 

268 print(GREY_HEADER) 

269 # Run Gromacs 

270 p = Popen([ 

271 "echo", 

272 CENTER_SELECTION_NAME, 

273 "System", 

274 ], stdout=PIPE) 

275 logs = run([ 

276 GROMACS_EXECUTABLE, 

277 "trjconv", 

278 "-s", 

279 structure_to_fit.path, 

280 "-f", 

281 trajectroy_to_fit.path, 

282 '-o', 

283 output_trajectory_file.path, 

284 '-fit', 

285 'rot+trans', 

286 '-n', 

287 CENTER_INDEX_FILEPATH, 

288 '-quiet' 

289 ], stdin=p.stdout, stdout=PIPE).stdout.decode() 

290 p.stdout.close() 

291 print(COLOR_END) 

292 

293 # If there is no output structure at this time (i.e. there was no imaging) then create it now 

294 # Note that the input structure is not necessarily the output structure in this scenario 

295 # The fit may have been done using the topology and there may be an offset, so better dump it 

296 if not output_structure_file.exists: 

297 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

298 

299 # Recover chains 

300 set_chains(output_structure_file.path, chains_backup) 

301 

302 # Clean up the index file 

303 # if exists(CENTER_INDEX_FILEPATH): 

304 # remove(CENTER_INDEX_FILEPATH) 

305 

306 

307# Get the first frame of a trajectory 

308def reset_structure ( 

309 input_structure_filename : str, 

310 input_trajectory_filename : str, 

311 output_structure_filename : str, 

312): 

313 print(' Reseting structure after imaging step') 

314 p = Popen([ 

315 "echo", 

316 "System", 

317 ], stdout=PIPE) 

318 process = run([ 

319 GROMACS_EXECUTABLE, 

320 "trjconv", 

321 "-s", 

322 input_structure_filename, 

323 "-f", 

324 input_trajectory_filename, 

325 '-o', 

326 output_structure_filename, 

327 '-dump', 

328 '0', 

329 '-quiet' 

330 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE) 

331 logs = process.stdout.decode() 

332 p.stdout.close()