Coverage for mddb_workflow / analyses / energies.py: 8%

368 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1# Energies 

2# 

3# The energies analysis is carried by ACPYPE and a locally developed tool: CMIP. 

4# ACPYPE is a tool based in Python to use Antechamber to generate topologies for chemical compounds and to interface with others python applications. 

5# CMIP stands for Classical Molecular Interaction Potential and it is usefull to predict electrostatic and Van der Waals potentials. 

6# 

7# ACPYPE: 

8# SOUSA DA SILVA, A. W. & VRANKEN, W. F. ACPYPE - AnteChamber PYthon Parser interfacE. BMC Research Notes 5 (2012), 367 doi: 10.1186/1756-0500-5-367 http://www.biomedcentral.com/1756-0500/5/367 

9# BATISTA, P. R.; WILTER, A.; DURHAM, E. H. A. B. & PASCUTTI, P. G. Molecular Dynamics Simulations Applied to the Study of Subtypes of HIV-1 Protease. Cell Biochemistry and Biophysics 44 (2006), 395-404. doi: 10.1385/CBB:44:3:395 

10# 

11# Antechamber: 

12# WANG, J., WANG, W., KOLLMAN, P. A., and CASE, D. A. Automatic atom type and bond type perception in molecular mechanical calculations. Journal of Molecular Graphics and Modelling 25, 2 (2006), 247–260. doi: 10.1016/j.jmgm.2005.12.005 

13# WANG, J., WOLF, R. M., CALDWELL, J. W., KOLLMAN, P. A., and CASE, D. A. Development and testing of a General Amber Force Field. Journal of Computational Chemistry 25, 9 (2004), 1157–1174. doi: 10.1002/jcc.20035 

14# 

15# CMIP: 

16# Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001), Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins. Proteins, 45: 428-437. doi:10.1002/prot.1159 

17 

18# Imports libraries 

19from os import mkdir, remove 

20from os.path import exists 

21from shutil import copyfile 

22from glob import glob 

23import re 

24import math 

25from subprocess import run, PIPE 

26 

27from mddb_workflow.tools.get_pdb_frames import get_pdb_frames 

28from mddb_workflow.utils.auxiliar import load_json, save_json, warn, numerate_filename, get_analysis_name 

29from mddb_workflow.utils.auxiliar import ForcedStop 

30from mddb_workflow.utils.constants import OUTPUT_ENERGIES_FILENAME 

31from mddb_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS 

32from mddb_workflow.utils.constants import CMIP_INPUTS_CHECKONLY_SOURCE, CMIP_INPUTS_SOURCE 

33from mddb_workflow.utils.constants import CMIP_VDW_SOURCE, ENERGIES_DEBUG_SCRIPT_SOURCE 

34from mddb_workflow.utils.structures import Structure 

35from mddb_workflow.utils.file import File 

36from mddb_workflow.utils.type_hints import * 

37 

38CURSOR_UP_ONE = '\x1b[1A' 

39ERASE_LINE = '\x1b[2K' 

40ERASE_3_PREVIOUS_LINES = CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE 

41 

42def energies ( 

43 trajectory_file : File, 

44 # Set a folder to be created in order to store residual output files from this analysis 

45 output_directory : str, 

46 structure : 'Structure', 

47 interactions : list, 

48 charges : list, 

49 snapshots : int, 

50 frames_limit : int = 100, 

51 verbose : bool = False, 

52 debug : bool = False): 

53 """Perform the electrostatic and vdw energies analysis for each pair of interaction agents.""" 

54 

55 # Make sure we have interactions 

56 if not interactions or len(interactions) == 0: 

57 print('No interactions were specified') 

58 return 

59 

60 # Make sure we have charges 

61 if not charges or type(charges) == Exception: 

62 print('Atom charges are not available -> This analysis will be skipped') 

63 return 

64 

65 # Set the main output filepath 

66 output_analysis_filepath = f'{output_directory}/{OUTPUT_ENERGIES_FILENAME}' 

67 

68 # Set the auxiliar files further required as CMIP inputs 

69 cmip_inputs_checkonly_source = File(CMIP_INPUTS_CHECKONLY_SOURCE) 

70 cmip_inputs_source = File(CMIP_INPUTS_SOURCE) 

71 vdw_source = File(CMIP_VDW_SOURCE) 

72 debug_script_source = File(ENERGIES_DEBUG_SCRIPT_SOURCE) 

73 

74 # Set a backup file to store some results on the fly 

75 # This is useful to restore these values in case the analysis is disrupt since it is a long analysis 

76 energies_backup = File(output_directory + '/backup.json') 

77 

78 # Define an additional file generated by CMIP which must be handled (written in the energies folder and deleted) 

79 restart_file = File(output_directory + '/restart') 

80 

81 # Check the number of atoms on each interacting agent 

82 # If there is any agent with more than 80000 atoms CMIP will fail so we must skip this specific energies analysis by now 

83 # DANI: Este valor límite se puede cambiar en CMIP, pero hay que recompilar y eso no es banal en un environment de conda 

84 cmip_atom_limit = 80000 

85 for interaction in interactions: 

86 exceeds = False 

87 for agent in ['1', '2']: 

88 atoms = interaction[f'atom_indices_{agent}'] 

89 atom_count = len(atoms) 

90 if atom_count >= cmip_atom_limit: 

91 exceeds = True 

92 break 

93 if exceeds: 

94 warn(f'{interaction["name"]} is exceeding the CMIP atom count limit of {cmip_atom_limit} and it will be skipped for this analysis') 

95 interaction['exceeds'] = True 

96 

97 # This anlaysis produces many residual output files 

98 # Create a new folder to store all ouput files so they do not overcrowd the main directory 

99 if not exists(output_directory): 

100 mkdir(output_directory) 

101 

102 # Adapt the structure for cmip 

103 energies_structure = structure.copy() 

104 

105 # Rename residues according to if they are terminals 

106 name_terminal_residues(energies_structure) 

107 

108 # Set each atom element in CMIP format 

109 set_cmip_elements(energies_structure) 

110 

111 # Save the structure back to a pdb 

112 energies_structure_file = File(output_directory + '/energies.pdb') 

113 energies_structure.generate_pdb_file(energies_structure_file.path) 

114 

115 # Transform an agent structure to a cmip input pdb, which includes charges and cmip-friendly elements 

116 # If this is to be a host file then leave only atoms involved in the interaction: both host and guest agents 

117 # If this is to be a host file, all guest atoms are set dummy* as well 

118 # If this is to be a guest file then remove all host atoms as well 

119 # Note that the frame structure is not the standard structure, which would already include charges 

120 # For this reason, charges are included in the frame structure before 

121 # Also flag some atoms as 'dummy' by adding a 'X' before the element 

122 # *Dummy atoms are not considered in the calculation but they stand for a region with low dielectric 

123 # If removed, the void left by these atoms would be considered to be filled with solvent, which has a high dielectric 

124 # These dielectric differences have a strong impact on the calculation 

125 # By default we set as dummy host atoms involved in a covalent bond with guest atoms 

126 def pdb2cmip ( 

127 agent_name : str, 

128 host_file : bool, 

129 frame_structure : 'Structure', 

130 host_selection : 'Selection', 

131 guest_selection : 'Selection', 

132 strong_bonds : Optional[list] 

133 ) -> File: 

134 # Get atom indices for host atoms involved in a covalent bond with guest atoms 

135 # If this is the host, they will be further marked as dummy for CMIP to ignore them in the calculations 

136 # If this is the guest, they will be removed 

137 strong_bond_atom_indices = [] 

138 if strong_bonds: 

139 for bond in strong_bonds: 

140 strong_bond_atom_indices += bond 

141 # Set atoms to be flagged as dummy 

142 dummy_atom_indices = set() 

143 # If this is to be the host file then set guest atoms and strong bond atoms dummy 

144 if host_file: 

145 dummy_atom_indices |= set(strong_bond_atom_indices) 

146 dummy_atom_indices |= set(guest_selection.atom_indices) 

147 # Set which atoms must be kept in the output file 

148 # If this is the host file then keep both host and guest atoms 

149 # If this is the guest file then keep only guest atoms 

150 # Always remove strong bond atoms in the guest 

151 selection = host_selection + guest_selection if host_file else guest_selection 

152 strong_bond_selection = frame_structure.select_atom_indices(strong_bond_atom_indices) 

153 guest_strong_bonds = guest_selection & strong_bond_selection 

154 selection -= guest_strong_bonds 

155 # Filter the selected atoms in the structure 

156 selected_structure = frame_structure.filter(selection) 

157 # Set atom charges (non standard attribute), which are used further to write the adapted cmip pdb 

158 # Set also the elements to macth the original structure, since the frame generator messes the elements 

159 for a, atom in enumerate(selected_structure.atoms): 

160 atom_index = selection.atom_indices[a] 

161 charge = charges[atom_index] 

162 setattr(atom, 'charge', charge) 

163 cmip_element = energies_structure.atoms[atom_index].element 

164 atom.element = cmip_element 

165 # Write a special pdb which contains charges as CMIP expects to find them and dummy atoms flagged 

166 output_filepath = f'{output_directory}/{agent_name}_{"host" if host_file else "guest"}.cmip.pdb' 

167 with open(output_filepath, "w") as file: 

168 # Write a line for each atom 

169 for a, atom in enumerate(selected_structure.atoms): 

170 index = str(a+1).rjust(5) 

171 atom_name = atom.name 

172 name = ' ' + atom_name.ljust(3) if len(atom_name) < 4 else atom_name 

173 residue = atom.residue 

174 residue_name = residue.name.ljust(3) 

175 chain = atom.chain 

176 chain_name = chain.name.rjust(1) 

177 residue_number = str(residue.number).rjust(4) 

178 icode = residue.icode.rjust(1) 

179 coords = atom.coords 

180 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in coords ] 

181 charge = "{:.4f}".format(atom.charge) # Charge was manually added before, it is not a standard attribute 

182 # In case this atom is making an strong bond between both interacting agents we add an 'X' before the element 

183 # This way CMIP will ignore the atom. Otherwise it would return high non-sense Van der Waals values 

184 real_index = selection.atom_indices[a] 

185 # Set if atom is to be ignored 

186 is_dummy = real_index in dummy_atom_indices 

187 cmip_dummy_flag = 'X' if is_dummy else '' 

188 element = atom.element 

189 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name + ' ' 

190 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord 

191 + ' ' + str(charge).rjust(7) + ' ' + cmip_dummy_flag + element + '\n') 

192 file.write(atom_line) 

193 

194 return File(output_filepath) 

195 

196 # Run CMIP in 'checkonly' mode (i.e. start and stop) for both agents 

197 # Mine the grid generated by CMIP for both agents and calculate a new grid which would include both 

198 # Finally, modify the provided 'cmip_inputs' with the new grid parameters 

199 def adapt_cmip_grid (agent1_cmip_pdb : File, agent2_cmip_pdb : File, cmip_inputs : File) -> tuple: 

200 

201 # Set a name for the checkonly CMIP outputs 

202 # This name is not important, since the data we want is in the CMIP logs 

203 cmip_checkonly_output = File(output_directory + '/checkonly.energy.pdb') 

204 

205 # Run CMIP in 'checkonly' mode and save the grid dimensions output 

206 # First do it for the agent 1 

207 cmip_logs_agent1 = run([ 

208 "cmip", 

209 "-i", 

210 cmip_inputs_checkonly_source.path, 

211 "-pr", 

212 agent1_cmip_pdb.path, 

213 "-vdw", 

214 vdw_source.path, 

215 "-hs", 

216 agent2_cmip_pdb.path, 

217 "-byat", 

218 cmip_checkonly_output.path, 

219 "-rst", 

220 restart_file.path 

221 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

222 

223 # Mine the grid dimensions from CMIP logs 

224 agent1_center, agent1_density, agent1_units = mine_cmip_output( 

225 cmip_logs_agent1.split("\n")) 

226 

227 # Run CMIP in 'checkonly' mode and save the grid dimensions output 

228 # Now do it for the agent 2 

229 cmip_logs_agent2 = run([ 

230 "cmip", 

231 "-i", 

232 cmip_inputs_checkonly_source.path, 

233 "-pr", 

234 agent2_cmip_pdb.path, 

235 "-vdw", 

236 vdw_source.path, 

237 "-hs", 

238 agent1_cmip_pdb.path, 

239 "-byat", 

240 cmip_checkonly_output.path, 

241 "-rst", 

242 restart_file.path 

243 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

244 

245 # Mine the grid dimensions from CMIP logs 

246 agent2_center, agent2_density, agent2_units = mine_cmip_output( 

247 cmip_logs_agent2.split("\n")) 

248 

249 # Calculate grid dimensions for a new grid which contains both previous grids 

250 new_center, new_density = compute_new_grid( 

251 agent1_center, 

252 agent1_density, 

253 agent1_units, 

254 agent2_center, 

255 agent2_density, 

256 agent2_units) 

257 

258 # Here we must calculate how many grid points the new box would have 

259 # In case the number of points exceeds a safe limit we must reduce it 

260 # Otherwise CMIP could return the following error: 

261 # ERROR when trying to allocate -2092351636 bytes of memory 

262 

263 # Set the limit number of grid points 

264 # It has been found experimentally: 65945880 Grid points -> OK, 68903412 Grid points -> ERROR 

265 grid_points_limit = 65000000 

266 

267 # Set the default grid 'cells' size for all three dimensions 

268 grid_unit_size = 0.5 

269 

270 # Calculate the amount of total points with current parameters 

271 current_grid_points = (new_density[0] + 1) * (new_density[1] + 1) * (new_density[2] + 1) 

272 

273 # In case the current number of grid points exceeds the limit... 

274 # Reduce all dimension densities in proportion and expand the grid units size to compensate 

275 if current_grid_points > grid_points_limit: 

276 print(f'WARNING: Grid points limit is exceeded ({current_grid_points})') 

277 proportion = grid_points_limit / current_grid_points 

278 new_density[0] = math.ceil(new_density[0] * proportion) 

279 new_density[1] = math.ceil(new_density[1] * proportion) 

280 new_density[2] = math.ceil(new_density[2] * proportion) 

281 grid_unit_size = math.ceil(grid_unit_size / proportion * 1000) / 1000 

282 print(f'WARNING: Grid resolution has been reduced -> unit size = {grid_unit_size}') 

283 

284 # Set the new lines to be written in the local CMIP inputs file 

285 grid_inputs = [ 

286 f" cenx={new_center[0]:.1f} \n", 

287 f" ceny={new_center[1]:.1f} \n", 

288 f" cenz={new_center[2]:.1f} \n", 

289 f" dimx={int(new_density[0])} \n", 

290 f" dimy={int(new_density[1])} \n", 

291 f" dimz={int(new_density[2])} \n", 

292 f" intx={grid_unit_size} \n", 

293 f" inty={grid_unit_size} \n", 

294 f" intz={grid_unit_size} \n", 

295 ] 

296 # Add previous lines to the local inputs file 

297 with open(cmip_inputs.path, "r+") as file: 

298 lines = file.readlines() 

299 file.seek(0) 

300 for line in lines: 

301 if line == '&end \n': 

302 for grid_input in grid_inputs: 

303 file.write(grid_input) 

304 file.write(line) 

305 

306 # Delete the 'ckeckonly' file 

307 cmip_checkonly_output.remove() 

308 

309 # Calculate the resulting box origin and size and return both values 

310 # These values are used for display / debug purposes only 

311 new_size = (new_density[0] * grid_unit_size, new_density[1] * grid_unit_size, new_density[2] * grid_unit_size) 

312 new_origin = (new_center[0] - new_size[0] / 2, new_center[1] - new_size[1] / 2, new_center[2] - new_size[2] / 2) 

313 return new_origin, new_size 

314 

315 # Run the CMIP software to get the desired energies 

316 def get_cmip_energies (cmip_inputs : File, guest : File, host : File) -> tuple: 

317 # Set the cmip output filename, which is to be read rigth after it is generated 

318 cmip_output_file = File(output_directory + '/cmip_output.pdb') 

319 # Run cmip 

320 cmip_process = run([ 

321 "cmip", 

322 "-i", 

323 cmip_inputs.path, 

324 "-pr", 

325 guest.path, 

326 "-vdw", 

327 vdw_source.path, 

328 "-hs", 

329 host.path, 

330 "-byat", 

331 cmip_output_file.path, 

332 "-rst", 

333 restart_file.path, 

334 ], stdout=PIPE, stderr=PIPE) 

335 cmip_logs = cmip_process.stdout.decode() 

336 # If the output does note exist at this poin then it means something went wrong with CMIP 

337 if not exists(cmip_output_file.path): 

338 print(cmip_logs) 

339 cmip_error_logs = cmip_process.stderr.decode() 

340 print(cmip_error_logs) 

341 raise RuntimeError('Something went wrong with CMIP!') 

342 # Mine the electrostatic (es) and Van der Walls (vdw) energies for each atom 

343 # Group the results by atom adding their values 

344 atom_energies = [] 

345 with open(cmip_output_file.path, 'r') as file: 

346 lines = list(file) 

347 # If this file is empty it means something went wrong with CMIP 

348 # We print its logs and exit 

349 if len(lines) == 0: 

350 if type(cmip_logs) == str: 

351 print(cmip_logs) 

352 else: 

353 for line in cmip_logs: 

354 print(line) 

355 raise RuntimeError('Something went wrong with CMIP!') 

356 # Mine energies line by line (i.e. atom by atom) 

357 for line in lines: 

358 # WARNING: This works most times but there is an exception 

359 # vdw = float(line[42:53]) 

360 # es = float(line[57:68]) 

361 # both = float(line[72:83]) 

362 # Numbers may become larger than expected when they are close to 0 since they are expressed in E notation 

363 # e.g. 0.1075E-04 

364 # For this reason we must mine these last values relying on whitespaces between them 

365 line_splits = line.split() 

366 vdw = float(line_splits[-3]) 

367 es = float(line_splits[-2]) 

368 both = float(line_splits[-1]) 

369 # Values greater than 100 are represented as 0 

370 # This step is performed to filter 'infinity' values 

371 energies = (vdw, es, both) 

372 if both > 100: 

373 #print('WARNING: We have extremly high values in energies which are beeing discarded') 

374 energies = (0, 0, 0) 

375 # Add current atom energy values to the atom energies list 

376 atom_energies.append(energies) 

377 return atom_energies 

378 

379 # Given a pdb structure, use CMIP to extract energies 

380 # Output energies are returned by atom 

381 def get_frame_energy (frame_structure : 'Structure') -> list[dict]: 

382 

383 # WARNING: At this point structure should be corrected 

384 # WARNING: Repeated atoms will make the analysis fail 

385 

386 # Repeat the whole process for each interaction 

387 data = [] 

388 for interaction in interactions: 

389 

390 # Check if the interaction has been marked as 'exceeds', in which case we skip it 

391 if interaction.get('exceeds', False): 

392 continue 

393 

394 # Get covalent bonds between both agents, if any 

395 # They will be not taken in count during the calculation 

396 # Otherwise we would have a huge energy peak in this atom since they are very close 

397 strong_bonds = interaction.get('strong_bonds', None) 

398 

399 # Get interaction and agent names, just for the logs 

400 interaction_name = interaction['name'] 

401 print(f' Processing {interaction_name}') 

402 agent1_name = interaction['agent_1'].replace(' ', '_').replace('/', '_') 

403 agent2_name = interaction['agent_2'].replace(' ', '_').replace('/', '_') 

404 # Get agent atom selections 

405 agent1_atom_indices = interaction[f'atom_indices_1'] 

406 agent1_selection = frame_structure.select_atom_indices(agent1_atom_indices) 

407 if not agent1_selection: 

408 raise ValueError(f'Empty agent 1 "{agent1_name}" from interaction "{interaction_name}"') 

409 agent2_atom_indices = interaction[f'atom_indices_2'] 

410 agent2_selection = frame_structure.select_atom_indices(agent2_atom_indices) 

411 if not agent2_selection: 

412 raise ValueError(f'Empty agent 2 "{agent2_name}" from interaction "{interaction_name}"') 

413 

414 # Prepare the CMIP friendly input pdb structures for every calculation 

415 # First prepare the host files and the prepare the guest files 

416 # There is only a difference between host and guest files: 

417 # Host files include both agent atoms but the guest atoms are all marked as dummy 

418 # Guest files include only guest atoms while host atoms are removed 

419 agent1_cmip_host = pdb2cmip(agent_name = agent1_name, host_file = True, frame_structure=frame_structure, 

420 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds) 

421 agent2_cmip_host = pdb2cmip(agent_name = agent2_name, host_file = True, frame_structure=frame_structure, 

422 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds) 

423 agent1_cmip_guest = pdb2cmip(agent_name = agent1_name, host_file = False, frame_structure=frame_structure, 

424 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds) 

425 agent2_cmip_guest = pdb2cmip(agent_name = agent2_name, host_file = False, frame_structure=frame_structure, 

426 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds) 

427 

428 # Copy the source cmip inputs file in the local directory 

429 # Inputs will be modified to adapt the cmip grid to both agents together 

430 # Note than modified inputs file is not conserved along frames 

431 # Structures may change along trajectory thus requiring a different grid size 

432 cmip_inputs_filename = cmip_inputs_source.filename 

433 cmip_inputs = File(f'{output_directory}/{cmip_inputs_filename}') 

434 copyfile(cmip_inputs_source.path, cmip_inputs.path) 

435 

436 # Set the CMIP box dimensions and densities to fit both the host and the guest 

437 # Box origin and size are modified in the cmip inputs 

438 # Values returned are only used for display / debug purposes 

439 box_origin, box_size = adapt_cmip_grid(agent1_cmip_guest, agent2_cmip_guest, cmip_inputs) 

440 

441 # If the debug flag is passed then, instead of calculating energies, leave it all ready and stop here 

442 if debug: 

443 # Copy in the energies folder a small python script used to sum output energies 

444 debug_script_filename = debug_script_source.filename 

445 debug_script = File(f'{output_directory}/{debug_script_filename}') 

446 copyfile(debug_script_source.path, debug_script.path) 

447 # Copy in the energies folder the VDM parameters input file 

448 debug_vdw_filename = vdw_source.fiename 

449 debug_vdw_parameters = File(f'{output_directory}/{debug_vdw_filename}') 

450 copyfile(vdw_source.path, debug_vdw_parameters.path) 

451 # Set auxiliar output filenames 

452 debug_output_1 = 'first_output.pdb' 

453 debug_output_2 = 'second_output.pdb' 

454 # Write a README file explaining what to do to manually debug CMIP 

455 debug_readme = File(f'{output_directory}/README') 

456 with open(debug_readme.path, 'w') as file: 

457 file.write( 

458 '# Energies debug\n\n' 

459 '# Run CMIP\n' 

460 f'cmip -i {cmip_inputs_filename} -pr {agent1_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent2_cmip_host.filename} -byat {debug_output_1}\n\n' 

461 '# Run CMIP inverting host and guest\n' 

462 f'cmip -i {cmip_inputs_filename} -pr {agent2_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent1_cmip_host.filename} -byat {debug_output_2}\n\n' 

463 '# Sum both energies and compare\n' 

464 f'python {debug_script_filename} {debug_output_1} {debug_output_2}\n' 

465 ) 

466 raise ForcedStop(' READY TO DEBUG -> Please go to the corresponding replica "energies" directory and follow the README instructions') 

467 

468 # Run the CMIP software to get the desired energies 

469 print(f' Calculating energies for {agent1_name} as guest and {agent2_name} as host') 

470 agent1_atom_energies = get_cmip_energies(cmip_inputs, agent1_cmip_guest, agent2_cmip_host) 

471 print(f' Calculating energies for {agent2_name} as guest and {agent1_name} as host') 

472 agent2_atom_energies = get_cmip_energies(cmip_inputs, agent2_cmip_guest, agent1_cmip_host) 

473 

474 # Add None in those places where there is a strong bond 

475 strong_bond_atom_indices = set(sum(strong_bonds, [])) 

476 for i, atom_index in enumerate(agent1_atom_indices): 

477 if atom_index in strong_bond_atom_indices: 

478 agent1_atom_energies.insert(i, None) 

479 for i, atom_index in enumerate(agent2_atom_indices): 

480 if atom_index in strong_bond_atom_indices: 

481 agent2_atom_energies.insert(i, None) 

482 

483 # Make sure the output size matches the number of atoms in the interaction 

484 if len(agent1_atom_energies) != len(agent1_atom_indices): 

485 print(f'Number of values: {len(agent1_atom_energies)}') 

486 print(f'Number of atoms: {len(agent1_atom_indices)}') 

487 raise ValueError('Missmatch in the number values and atoms in agent 1') 

488 if len(agent2_atom_energies) != len(agent2_atom_indices): 

489 print(f'Number of values: {len(agent2_atom_energies)}') 

490 print(f'Number of atoms: {len(agent2_atom_indices)}') 

491 raise ValueError('Missmatch in the number values and atoms in agent 2') 

492 

493 # Print total energies at the end for every agent if the verbose flag has been passed 

494 if verbose: 

495 for agent_name, agent_energies in zip([agent1_name, agent2_name], [agent1_atom_energies, agent2_atom_energies]): 

496 total_vdw = total_es = total_both = 0 

497 for atom_energies in agent_energies: 

498 total_vdw += atom_energies[0] 

499 total_es += atom_energies[1] 

500 total_both += atom_energies[2] 

501 print(f' Total energies for {agent_name}: vmd {total_vdw}, es {total_es}, both {total_both}') 

502 

503 # DANI: Usa esto para escribir los resultados de las energías por átomo 

504 # sample = { 

505 # 'agent1': { 'energies': agent1_atom_energies, 'pdb': agent1_cmip }, 

506 # 'agent2': { 'energies': agent2_atom_energies, 'pdb': agent2_cmip }, 

507 # 'box': { 'origin': box_origin, 'size': box_size } 

508 # } 

509 # save_json(sample, 'energies_sample.json') 

510 

511 data.append({ 'agent1': agent1_atom_energies, 'agent2': agent2_atom_energies }) 

512 

513 # Erase the 2 previous log lines 

514 print(ERASE_3_PREVIOUS_LINES) 

515 

516 return data 

517 

518 # Extract the energies for each frame in a reduced trajectory 

519 frames, step, count = get_pdb_frames(energies_structure_file.path, trajectory_file.path, snapshots, frames_limit) 

520 non_exceeding_interactions = [interaction for interaction in interactions if not interaction.get('exceeds', False)] 

521 

522 # Load backup data in case there is a backup file 

523 if energies_backup.exists: 

524 print(' Recovering energies backup') 

525 interactions_data = load_json(energies_backup.path) 

526 else: 

527 interactions_data = [[] for interaction in non_exceeding_interactions] 

528 for frame_number, current_frame_pdb in enumerate(frames): 

529 # If we already have this frame in the backup then skip it 

530 if frame_number < len(interactions_data[0]): 

531 continue 

532 # Run the main analysis over the current frame 

533 # Append the result data for each interaction 

534 current_frame_structure = Structure.from_pdb_file(current_frame_pdb) 

535 frame_energies_data = get_frame_energy(current_frame_structure) 

536 for i, data in enumerate(frame_energies_data): 

537 interactions_data[i].append(data) 

538 # Save a backup just in case the process is interrupted further 

539 save_json(interactions_data, energies_backup.path) 

540 

541 # Cleanup here some CMIP residual files since it is not to be run again 

542 # Remove the restart file since we do not use it and it may be heavy sometimes 

543 if restart_file.exists: 

544 restart_file.remove() 

545 # Remove fortran unit files generated when running CMIP 

546 # Note that we can not define where these files are written but they appear where CMIP is run 

547 fortran_unit_files = glob('fort.*') 

548 for filepath in fortran_unit_files: 

549 remove(filepath) 

550 

551 # Now calculated atom average values through all frames for each pair of interaction agents 

552 output_summary = [] 

553 for i, interaction in enumerate(non_exceeding_interactions): 

554 # Check if the interaction as been marked as 'exceeds', in which case we skip it 

555 if interaction.get('exceeds', False): continue 

556 # Get the numerated output filepath for this specific interaction 

557 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i) 

558 # Add the root of the output analysis filename to the run data 

559 analysis_name = get_analysis_name(numbered_output_analysis_filepath) 

560 # Get the interaction name 

561 name = interaction['name'] 

562 # Add this interaction to the final summary 

563 output_summary.append({ 

564 'name': name, 

565 'analysis': analysis_name 

566 }) 

567 # If the output file already exists then skip this iteration 

568 if exists(numbered_output_analysis_filepath): continue 

569 # Get the main data 

570 data = interactions_data[i] 

571 # Format data 

572 agent1_output = format_data([ frame['agent1'] for frame in data ]) 

573 agent2_output = format_data([ frame['agent2'] for frame in data ]) 

574 # Format the results data and append it to the output data 

575 output = { 

576 'name': name, 

577 'agent1': agent1_output, 

578 'agent2': agent2_output, 

579 'version': '1.1.0' 

580 } 

581 # Export the current interaction analysis in json format 

582 save_json(output, numbered_output_analysis_filepath) 

583 

584 # Finally, export the summary in json format 

585 save_json(output_summary, output_analysis_filepath) 

586 

587 # Remove the backup to avoid accidentally reusing it when the output file is deleted 

588 energies_backup.remove() 

589 

590 # Finally remove the reduced topology 

591 energies_structure_file.remove() 

592 

593# Given a topology (e.g. pdb, prmtop), extract the atom elements in a CMIP friendly format 

594# Hydrogens bonded to carbons remain as 'H' 

595# Hydrogens bonded to oxygen are renamed as 'HO' 

596# Hydrogens bonded to nitrogen or sulfur are renamed as 'HN' 

597# Some heavy atom elements may be also modified (e.g. 'CL' -> 'Cl') 

598def set_cmip_elements (structure : 'Structure'): 

599 # Iterate over each atom to fix their element according to CMIP standards 

600 for a, atom in enumerate(structure.atoms): 

601 element = atom.element 

602 # Adapt hydrogens element to CMIP requirements 

603 if element == 'H': 

604 # We must find the element of the heavy atom this hydrogen is bonded to 

605 atom_bonds = atom.get_bonds() 

606 # There should be always only 1 bond 

607 if len(atom_bonds) != 1: 

608 print(f'Atom {atom.name} ({atom.index}) has {len(atom_bonds)} bonds') 

609 raise ValueError('An hydrogen should always have one and only one bond') 

610 bonded_atom_index = atom_bonds[0] 

611 bonded_atom_element = structure.atoms[bonded_atom_index].element 

612 # Hydrogens bonded to carbons remain as 'H' 

613 if bonded_atom_element == 'C': 

614 pass 

615 # Hydrogens bonded to oxygen are renamed as 'HO' 

616 elif bonded_atom_element == 'O': 

617 element = 'HO' 

618 # Hydrogens bonded to nitrogen or sulfur are renamed as 'HN' 

619 elif bonded_atom_element == 'N' or bonded_atom_element == 'S': 

620 element = 'HN' 

621 else: 

622 raise RuntimeError(f'Hydrogen bonded to not supported heavy atom: {bonded_atom_element}') 

623 atom.element = element 

624 

625# Set residue names with no endings 

626protein_mid_residue_names = { name for name in PROTEIN_RESIDUE_NAME_LETTERS if name[-1] not in {'N','C'} } 

627nucleic_mid_residue_names = { name for name in NUCLEIC_RESIDUE_NAME_LETTERS if name[-1] not in {'5','3'} } 

628 

629# Change residue names in a structure to meet the CMIP requirements 

630# Change terminal residue names by adding an 'N' or 'C' 

631def name_terminal_residues (structure : 'Structure'): 

632 # Iterate chains 

633 for chain in structure.chains: 

634 # Check if the first residue is tagged as a terminal residue 

635 # If not, rename it 

636 first_residue = chain.residues[0] 

637 # In case it is a protein 

638 if first_residue.name in protein_mid_residue_names: 

639 first_residue.name += 'N' 

640 # In case it is RNA 

641 elif first_residue.name in nucleic_mid_residue_names: 

642 first_residue.name += '5' 

643 # Check if the last residue is tagged as 'C' terminal 

644 # If not, rename it 

645 last_residue = chain.residues[-1] 

646 # In case it is a protein 

647 if last_residue.name in protein_mid_residue_names: 

648 last_residue.name += 'C' 

649 # In case it is RNA 

650 elif last_residue.name in nucleic_mid_residue_names: 

651 last_residue.name += '3' 

652 

653def mine_cmip_output (logs): 

654 center, density, units = (), (), () 

655 grid_density_exp = r"^\s*Grid density:\s+(\d+)\s+(\d+)\s+(\d+)" 

656 grid_center_exp = r"^\s*Grid center:([- ]+\d+.\d+)([- ]+\d+.\d+)([- ]+\d+.\d+)" 

657 grid_units_exp = r"^\s*Grid units:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)" 

658 for line in logs: 

659 grid_center_groups = re.match(grid_center_exp, line) 

660 grid_density_groups = re.match(grid_density_exp, line) 

661 grid_units_groups = re.match(grid_units_exp, line) 

662 if grid_density_groups: 

663 density = tuple(float(grid_density_groups.group(i)) 

664 for i in (1, 2, 3)) 

665 if grid_center_groups: 

666 center = tuple(float(grid_center_groups.group(i)) 

667 for i in (1, 2, 3)) 

668 if grid_units_groups: 

669 units = tuple(float(grid_units_groups.group(i)) 

670 for i in (1, 2, 3)) 

671 # If data mining fails there must be something wrong with the CMIP output 

672 if center == () or density == () or units == (): 

673 for line in logs: 

674 print(line) 

675 raise RuntimeError('CMIP output mining failed') 

676 return center, density, units 

677 

678# This function is used to create new grid parameters 

679# The new grid is expected to cover both input grids: agent 1 grid and agent 2 grid 

680def compute_new_grid ( 

681 agent1_center, 

682 agent1_density, 

683 agent1_units, 

684 agent2_center, 

685 agent2_density, 

686 agent2_units, 

687 extra_density=1): 

688 new_center = [] # [(i + j) / 2 for i, j in zip(agent1_center, agent2_center)] 

689 new_density = [] 

690 

691 for k in range(3): 

692 min_prot = agent1_center[k] - agent1_density[k] * agent1_units[k] 

693 min_lig = agent2_center[k] - agent2_density[k] * agent2_units[k] 

694 min_new = min(min_prot, min_lig) 

695 max_prot = agent1_center[k] + agent1_density[k] * agent1_units[k] 

696 max_lig = agent2_center[k] + agent2_density[k] * agent2_units[k] 

697 max_new = max(max_prot, max_lig) 

698 

699 dnew = int(abs(max_new - min_new) + extra_density) 

700 cnew = min_new + (dnew / 2) 

701 new_density.append(dnew) 

702 new_center.append(cnew) 

703 return new_center, new_density 

704 

705# Format data grouping atom energies by average ES/VDW 

706# Calculate values for the whole set of frames analyzed 

707# The canclulate it also for the 20% first frames and the 20% last frames separatedly 

708def format_data (data : list) -> dict: 

709 

710 # First, reorder data by atoms and energies 

711 atom_count = len(data[0]) 

712 

713 atom_vdw_values = [[] for n in range(atom_count)] 

714 atom_es_values = [[] for n in range(atom_count)] 

715 atom_both_values = [[] for n in range(atom_count)] 

716 for frame in data: 

717 for a, values in enumerate(frame): 

718 if values == None: continue 

719 atom_vdw_values[a].append(values[0]) 

720 atom_es_values[a].append(values[1]) 

721 atom_both_values[a].append(values[2]) 

722 

723 # Calculate the atom averages from each energy 

724 atom_vdw_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values ] 

725 atom_es_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values] 

726 atom_both_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values] 

727 

728 # Calculate the atom averages from each energy at the beginig and end of the trajectory 

729 # We take the initial 20% and the final 20% of frames to calculate each respectively 

730 p20_frames = len(data) 

731 p20 = round(p20_frames*0.2) 

732 if p20 == 0: p20 = 1 

733 

734 # Initials 

735 atom_vdw_values_initial = [[] for n in range(atom_count)] 

736 atom_es_values_initial = [[] for n in range(atom_count)] 

737 atom_both_values_initial = [[] for n in range(atom_count)] 

738 for frame in data[:p20]: 

739 for a, values in enumerate(frame): 

740 if values == None: continue 

741 atom_vdw_values_initial[a].append(values[0]) 

742 atom_es_values_initial[a].append(values[1]) 

743 atom_both_values_initial[a].append(values[2]) 

744 

745 atom_vdw_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_initial] 

746 atom_es_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_initial] 

747 atom_both_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_initial] 

748 

749 # Finals 

750 atom_vdw_values_final = [[] for n in range(atom_count)] 

751 atom_es_values_final = [[] for n in range(atom_count)] 

752 atom_both_values_final = [[] for n in range(atom_count)] 

753 for frame in data[-p20:]: 

754 for a, values in enumerate(frame): 

755 if values == None: continue 

756 atom_vdw_values_final[a].append(values[0]) 

757 atom_es_values_final[a].append(values[1]) 

758 atom_both_values_final[a].append(values[2]) 

759 

760 atom_vdw_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_final] 

761 atom_es_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_final] 

762 atom_both_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_final] 

763 

764 # Format the results data and append it to the output data 

765 output = { 

766 'vdw': atom_vdw_avg, 

767 'es': atom_es_avg, 

768 'both': atom_both_avg, 

769 'ivdw': atom_vdw_avg_initial, 

770 'ies': atom_es_avg_initial, 

771 'iboth': atom_both_avg_initial, 

772 'fvdw': atom_vdw_avg_final, 

773 'fes': atom_es_avg_final, 

774 'fboth': atom_both_avg_final, 

775 } 

776 

777 return output