Coverage for model_workflow/analyses/energies.py: 84%

345 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1# Energies 

2# 

3# The energies analysis is carried by ACPYPE and a locally developed tool: CMIP. 

4# ACPYPE is a tool based in Python to use Antechamber to generate topologies for chemical compounds and to interface with others python applications. 

5# CMIP stands for Classical Molecular Interaction Potential and it is usefull to predict electrostatic and Van der Waals potentials. 

6# 

7# ACPYPE: 

8# SOUSA DA SILVA, A. W. & VRANKEN, W. F. ACPYPE - AnteChamber PYthon Parser interfacE. BMC Research Notes 5 (2012), 367 doi: 10.1186/1756-0500-5-367 http://www.biomedcentral.com/1756-0500/5/367 

9# BATISTA, P. R.; WILTER, A.; DURHAM, E. H. A. B. & PASCUTTI, P. G. Molecular Dynamics Simulations Applied to the Study of Subtypes of HIV-1 Protease. Cell Biochemistry and Biophysics 44 (2006), 395-404. doi: 10.1385/CBB:44:3:395 

10# 

11# Antechamber: 

12# WANG, J., WANG, W., KOLLMAN, P. A., and CASE, D. A. Automatic atom type and bond type perception in molecular mechanical calculations. Journal of Molecular Graphics and Modelling 25, 2 (2006), 247–260. doi: 10.1016/j.jmgm.2005.12.005 

13# WANG, J., WOLF, R. M., CALDWELL, J. W., KOLLMAN, P. A., and CASE, D. A. Development and testing of a General Amber Force Field. Journal of Computational Chemistry 25, 9 (2004), 1157–1174. doi: 10.1002/jcc.20035 

14# 

15# CMIP: 

16# Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001), Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins. Proteins, 45: 428-437. doi:10.1002/prot.1159 

17 

18# Imports libraries 

19from os import mkdir, remove 

20from os.path import exists 

21from shutil import copyfile 

22from pathlib import Path 

23from glob import glob 

24import re 

25import math 

26from subprocess import run, PIPE 

27 

28from model_workflow.tools.get_pdb_frames import get_pdb_frames 

29from model_workflow.utils.auxiliar import load_json, save_json, warn, numerate_filename, get_analysis_name 

30from model_workflow.utils.constants import OUTPUT_ENERGIES_FILENAME 

31from model_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS 

32from model_workflow.utils.structures import Structure 

33from model_workflow.utils.file import File 

34from model_workflow.utils.type_hints import * 

35 

36CURSOR_UP_ONE = '\x1b[1A' 

37ERASE_LINE = '\x1b[2K' 

38ERASE_3_PREVIOUS_LINES = CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE 

39 

40# Set some auxiliar file names 

41CMIP_INPUTS_FILE = 'cmip.in' 

42CMIP_CHECK_INPUTS_FILE = 'cmip_check.in' 

43VDW_PARAMETERS = 'vdwprm' 

44DEBUG_ENERGIES_SUM_SCRIPT = 'get_energies_sum.py' 

45 

46# Set the path to the resources folder where we store auxiliar files required for this analysis 

47resources = str(Path(__file__).parent.parent / "resources") 

48 

49def energies ( 

50 trajectory_file : File, 

51 # Set a folder to be created in order to store residual output files from this analysis 

52 output_directory : str, 

53 structure : 'Structure', 

54 interactions : list, 

55 charges : list, 

56 snapshots : int, 

57 frames_limit : int = 100, 

58 verbose : bool = False, 

59 debug : bool = False): 

60 """Perform the electrostatic and vdw energies analysis for each pair of interaction agents.""" 

61 

62 # Make sure we have interactions 

63 if not interactions or len(interactions) == 0: 

64 print('No interactions were specified') 

65 return 

66 

67 # Make sure we have charges 

68 if not charges or type(charges) == Exception: 

69 print('Atom charges are not available -> This analysis will be skipped') 

70 return 

71 

72 # Set the main output filepath 

73 output_analysis_filepath = f'{output_directory}/{OUTPUT_ENERGIES_FILENAME}' 

74 

75 # Set the auxiliar files further required as CMIP inputs 

76 cmip_inputs_checkonly_source = File(f'{resources}/{CMIP_CHECK_INPUTS_FILE}') 

77 cmip_inputs_source = File(f'{resources}/{CMIP_INPUTS_FILE}') 

78 vdw_source = File(f'{resources}/{VDW_PARAMETERS}') 

79 debug_script_source = File(f'{resources}/{DEBUG_ENERGIES_SUM_SCRIPT}') 

80 

81 # Set a backup file to store some results on the fly 

82 # This is useful to restore these values in case the analysis is disrupt since it is a long analysis 

83 energies_backup = File(output_directory + '/backup.json') 

84 

85 # Define an additional file generated by CMIP which must be handled (written in the energies folder and deleted) 

86 restart_file = File(output_directory + '/restart') 

87 

88 # Check the number of atoms on each interacting agent 

89 # If there is any agent with more than 80000 atoms CMIP will fail so we must skip this specific energies analysis by now 

90 # DANI: Este valor límite se puede cambiar en CMIP, pero hay que recompilar y eso no es banal en un environment de conda 

91 cmip_atom_limit = 80000 

92 for interaction in interactions: 

93 exceeds = False 

94 for agent in ['1', '2']: 

95 atoms = interaction[f'atom_indices_{agent}'] 

96 atom_count = len(atoms) 

97 if atom_count >= cmip_atom_limit: 

98 exceeds = True 

99 break 

100 if exceeds: 

101 warn(f'{interaction["name"]} is exceeding the CMIP atom count limit of {cmip_atom_limit} and it will be skipped for this analysis') 

102 interaction['exceeds'] = True 

103 

104 # This anlaysis produces many residual output files 

105 # Create a new folder to store all ouput files so they do not overcrowd the main directory 

106 if not exists(output_directory): 

107 mkdir(output_directory) 

108 

109 # Adapt the structure for cmip 

110 energies_structure = structure.copy() 

111 

112 # Rename residues according to if they are terminals 

113 name_terminal_residues(energies_structure) 

114 

115 # Set each atom element in CMIP format 

116 set_cmip_elements(energies_structure) 

117 

118 # Save the structure back to a pdb 

119 energies_structure_file = File(output_directory + '/energies.pdb') 

120 energies_structure.generate_pdb_file(energies_structure_file.path) 

121 

122 # Transform an agent structure to a cmip input pdb, which includes charges and cmip-friendly elements 

123 # If this is to be a host file then leave only atoms involved in the interaction: both host and guest agents 

124 # If this is to be a host file, all guest atoms are set dummy* as well 

125 # If this is to be a guest file then remove all host atoms as well 

126 # Note that the frame structure is not the standard structure, which would already include charges 

127 # For this reason, charges are included in the frame structure before 

128 # Also flag some atoms as 'dummy' by adding a 'X' before the element 

129 # *Dummy atoms are not considered in the calculation but they stand for a region with low dielectric 

130 # If removed, the void left by these atoms would be considered to be filled with solvent, which has a high dielectric 

131 # These dielectric differences have a strong impact on the calculation 

132 # By default we set as dummy host atoms involved in a covalent bond with guest atoms 

133 def pdb2cmip ( 

134 agent_name : str, 

135 host_file : bool, 

136 frame_structure : 'Structure', 

137 host_selection : 'Selection', 

138 guest_selection : 'Selection', 

139 strong_bonds : Optional[list] 

140 ) -> File: 

141 # Get atom indices for host atoms involved in a covalent bond with guest atoms 

142 # If this is the host, they will be further marked as dummy for CMIP to ignore them in the calculations 

143 # If this is the guest, they will be removed 

144 strong_bond_atom_indices = [] 

145 if strong_bonds: 

146 for bond in strong_bonds: 

147 strong_bond_atom_indices += bond 

148 # Set atoms to be flagged as dummy 

149 dummy_atom_indices = set() 

150 # If this is to be the host file then set guest atoms and strong bond atoms dummy 

151 if host_file: 

152 dummy_atom_indices |= set(strong_bond_atom_indices) 

153 dummy_atom_indices |= set(guest_selection.atom_indices) 

154 # Set which atoms must be kept in the output file 

155 # If this is the host file then keep both host and guest atoms 

156 # If this is the guest file then keep only guest atoms 

157 # Always remove strong bond atoms in the guest 

158 selection = host_selection + guest_selection if host_file else guest_selection 

159 strong_bond_selection = frame_structure.select_atom_indices(strong_bond_atom_indices) 

160 guest_strong_bonds = guest_selection & strong_bond_selection 

161 selection -= guest_strong_bonds 

162 # Filter the selected atoms in the structure 

163 selected_structure = frame_structure.filter(selection) 

164 # Set atom charges (non standard attribute), which are used further to write the adapted cmip pdb 

165 # Set also the elements to macth the original structure, since the frame generator messes the elements 

166 for a, atom in enumerate(selected_structure.atoms): 

167 atom_index = selection.atom_indices[a] 

168 charge = charges[atom_index] 

169 setattr(atom, 'charge', charge) 

170 cmip_element = energies_structure.atoms[atom_index].element 

171 atom.element = cmip_element 

172 # Write a special pdb which contains charges as CMIP expects to find them and dummy atoms flagged 

173 output_filepath = f'{output_directory}/{agent_name}_{"host" if host_file else "guest"}.cmip.pdb' 

174 with open(output_filepath, "w") as file: 

175 # Write a line for each atom 

176 for a, atom in enumerate(selected_structure.atoms): 

177 index = str(a+1).rjust(5) 

178 atom_name = atom.name 

179 name = ' ' + atom_name.ljust(3) if len(atom_name) < 4 else atom_name 

180 residue = atom.residue 

181 residue_name = residue.name.ljust(3) 

182 chain = atom.chain 

183 chain_name = chain.name.rjust(1) 

184 residue_number = str(residue.number).rjust(4) 

185 icode = residue.icode.rjust(1) 

186 coords = atom.coords 

187 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in coords ] 

188 charge = "{:.4f}".format(atom.charge) # Charge was manually added before, it is not a standard attribute 

189 # In case this atom is making an strong bond between both interacting agents we add an 'X' before the element 

190 # This way CMIP will ignore the atom. Otherwise it would return high non-sense Van der Waals values 

191 real_index = selection.atom_indices[a] 

192 # Set if atom is to be ignored 

193 is_dummy = real_index in dummy_atom_indices 

194 cmip_dummy_flag = 'X' if is_dummy else '' 

195 element = atom.element 

196 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name + ' ' 

197 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord 

198 + ' ' + str(charge).rjust(7) + ' ' + cmip_dummy_flag + element + '\n') 

199 file.write(atom_line) 

200 

201 return File(output_filepath) 

202 

203 # Run CMIP in 'checkonly' mode (i.e. start and stop) for both agents 

204 # Mine the grid generated by CMIP for both agents and calculate a new grid which would include both 

205 # Finally, modify the provided 'cmip_inputs' with the new grid parameters 

206 def adapt_cmip_grid (agent1_cmip_pdb : File, agent2_cmip_pdb : File, cmip_inputs : File) -> tuple: 

207 

208 # Set a name for the checkonly CMIP outputs 

209 # This name is not important, since the data we want is in the CMIP logs 

210 cmip_checkonly_output = File(output_directory + '/checkonly.energy.pdb') 

211 

212 # Run CMIP in 'checkonly' mode and save the grid dimensions output 

213 # First do it for the agent 1 

214 cmip_logs_agent1 = run([ 

215 "cmip", 

216 "-i", 

217 cmip_inputs_checkonly_source.path, 

218 "-pr", 

219 agent1_cmip_pdb.path, 

220 "-vdw", 

221 vdw_source.path, 

222 "-hs", 

223 agent2_cmip_pdb.path, 

224 "-byat", 

225 cmip_checkonly_output.path, 

226 "-rst", 

227 restart_file.path 

228 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

229 

230 # Mine the grid dimensions from CMIP logs 

231 agent1_center, agent1_density, agent1_units = mine_cmip_output( 

232 cmip_logs_agent1.split("\n")) 

233 

234 # Run CMIP in 'checkonly' mode and save the grid dimensions output 

235 # Now do it for the agent 2 

236 cmip_logs_agent2 = run([ 

237 "cmip", 

238 "-i", 

239 cmip_inputs_checkonly_source.path, 

240 "-pr", 

241 agent2_cmip_pdb.path, 

242 "-vdw", 

243 vdw_source.path, 

244 "-hs", 

245 agent1_cmip_pdb.path, 

246 "-byat", 

247 cmip_checkonly_output.path, 

248 "-rst", 

249 restart_file.path 

250 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

251 

252 # Mine the grid dimensions from CMIP logs 

253 agent2_center, agent2_density, agent2_units = mine_cmip_output( 

254 cmip_logs_agent2.split("\n")) 

255 

256 # Calculate grid dimensions for a new grid which contains both previous grids 

257 new_center, new_density = compute_new_grid( 

258 agent1_center, 

259 agent1_density, 

260 agent1_units, 

261 agent2_center, 

262 agent2_density, 

263 agent2_units) 

264 

265 # Here we must calculate how many grid points the new box would have 

266 # In case the number of points exceeds a safe limit we must reduce it 

267 # Otherwise CMIP could return the following error: 

268 # ERROR when trying to allocate -2092351636 bytes of memory 

269 

270 # Set the limit number of grid points 

271 # It has been found experimentally: 65945880 Grid points -> OK, 68903412 Grid points -> ERROR 

272 grid_points_limit = 65000000 

273 

274 # Set the default grid 'cells' size for all three dimensions 

275 grid_unit_size = 0.5 

276 

277 # Calculate the amount of total points with current parameters 

278 current_grid_points = (new_density[0] + 1) * (new_density[1] + 1) * (new_density[2] + 1) 

279 

280 # In case the current number of grid points exceeds the limit... 

281 # Reduce all dimension densities in proportion and expand the grid units size to compensate 

282 if current_grid_points > grid_points_limit: 

283 print(f'WARNING: Grid points limit is exceeded ({current_grid_points})') 

284 proportion = grid_points_limit / current_grid_points 

285 new_density[0] = math.ceil(new_density[0] * proportion) 

286 new_density[1] = math.ceil(new_density[1] * proportion) 

287 new_density[2] = math.ceil(new_density[2] * proportion) 

288 grid_unit_size = math.ceil(grid_unit_size / proportion * 1000) / 1000 

289 print(f'WARNING: Grid resolution has been reduced -> unit size = {grid_unit_size}') 

290 

291 # Set the new lines to be written in the local CMIP inputs file 

292 grid_inputs = [ 

293 f" cenx={new_center[0]:.1f} \n", 

294 f" ceny={new_center[1]:.1f} \n", 

295 f" cenz={new_center[2]:.1f} \n", 

296 f" dimx={int(new_density[0])} \n", 

297 f" dimy={int(new_density[1])} \n", 

298 f" dimz={int(new_density[2])} \n", 

299 f" intx={grid_unit_size} \n", 

300 f" inty={grid_unit_size} \n", 

301 f" intz={grid_unit_size} \n", 

302 ] 

303 # Add previous lines to the local inputs file 

304 with open(cmip_inputs.path, "r+") as file: 

305 lines = file.readlines() 

306 file.seek(0) 

307 for line in lines: 

308 if line == '&end \n': 

309 for grid_input in grid_inputs: 

310 file.write(grid_input) 

311 file.write(line) 

312 

313 # Delete the 'ckeckonly' file 

314 cmip_checkonly_output.remove() 

315 

316 # Calculate the resulting box origin and size and return both values 

317 # These values are used for display / debug purposes only 

318 new_size = (new_density[0] * grid_unit_size, new_density[1] * grid_unit_size, new_density[2] * grid_unit_size) 

319 new_origin = (new_center[0] - new_size[0] / 2, new_center[1] - new_size[1] / 2, new_center[2] - new_size[2] / 2) 

320 return new_origin, new_size 

321 

322 # Run the CMIP software to get the desired energies 

323 def get_cmip_energies (cmip_inputs : File, guest : File, host : File) -> tuple: 

324 # Set the cmip output filename, which is to be read rigth after it is generated 

325 cmip_output_file = File(output_directory + '/cmip_output.pdb') 

326 # Run cmip 

327 cmip_logs = run([ 

328 "cmip", 

329 "-i", 

330 cmip_inputs.path, 

331 "-pr", 

332 guest.path, 

333 "-vdw", 

334 vdw_source.path, 

335 "-hs", 

336 host.path, 

337 "-byat", 

338 cmip_output_file.path, 

339 "-rst", 

340 restart_file.path, 

341 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

342 # Mine the electrostatic (es) and Van der Walls (vdw) energies for each atom 

343 # Group the results by atom adding their values 

344 atom_energies = [] 

345 with open(cmip_output_file.path, 'r') as file: 

346 lines = list(file) 

347 # If this file is empty it means something went wrong with CMIP 

348 # We print its logs and exit 

349 if len(lines) == 0: 

350 if type(cmip_logs) == str: 

351 print(cmip_logs) 

352 else: 

353 for line in cmip_logs: 

354 print(line) 

355 raise SystemExit('ERROR: Something went wrong with CMIP!') 

356 # Mine energies line by line (i.e. atom by atom) 

357 for line in lines: 

358 # WARNING: This works most times but there is an exception 

359 # vdw = float(line[42:53]) 

360 # es = float(line[57:68]) 

361 # both = float(line[72:83]) 

362 # Numbers may become larger than expected when they are close to 0 since they are expressed in E notation 

363 # e.g. 0.1075E-04 

364 # For this reason we must mine these last values relying on whitespaces between them 

365 line_splits = line.split() 

366 vdw = float(line_splits[-3]) 

367 es = float(line_splits[-2]) 

368 both = float(line_splits[-1]) 

369 # Values greater than 100 are represented as 0 

370 # This step is performed to filter 'infinity' values 

371 energies = (vdw, es, both) 

372 if both > 100: 

373 #print('WARNING: We have extremly high values in energies which are beeing discarded') 

374 energies = (0, 0, 0) 

375 # Add current atom energy values to the atom energies list 

376 atom_energies.append(energies) 

377 return atom_energies 

378 

379 # Given a pdb structure, use CMIP to extract energies 

380 # Output energies are returned by atom 

381 def get_frame_energy (frame_structure : 'Structure') -> List[dict]: 

382 

383 # WARNING: At this point structure should be corrected 

384 # WARNING: Repeated atoms will make the analysis fail 

385 

386 # Repeat the whole process for each interaction 

387 data = [] 

388 for interaction in interactions: 

389 

390 # Check if the interaction has been marked as 'exceeds', in which case we skip it 

391 if interaction.get('exceeds', False): 

392 continue 

393 

394 # Get covalent bonds between both agents, if any 

395 # They will be not taken in count during the calculation 

396 # Otherwise we would have a huge energy peak in this atom since they are very close 

397 strong_bonds = interaction.get('strong_bonds', None) 

398 

399 # Get interaction and agent names, just for the logs 

400 interaction_name = interaction['name'] 

401 print(f' Processing {interaction_name}') 

402 agent1_name = interaction['agent_1'].replace(' ', '_').replace('/', '_') 

403 agent2_name = interaction['agent_2'].replace(' ', '_').replace('/', '_') 

404 # Get agent atom selections 

405 agent1_atom_indices = interaction[f'atom_indices_1'] 

406 agent1_selection = frame_structure.select_atom_indices(agent1_atom_indices) 

407 if not agent1_selection: 

408 raise ValueError(f'Empty agent 1 "{agent1_name}" from interaction "{interaction_name}"') 

409 agent2_atom_indices = interaction[f'atom_indices_2'] 

410 agent2_selection = frame_structure.select_atom_indices(agent2_atom_indices) 

411 if not agent2_selection: 

412 raise ValueError(f'Empty agent 2 "{agent2_name}" from interaction "{interaction_name}"') 

413 

414 # Prepare the CMIP friendly input pdb structures for every calculation 

415 # First prepare the host files and the prepare the guest files 

416 # There is only a difference between host and guest files: 

417 # Host files include both agent atoms but the guest atoms are all marked as dummy 

418 # Guest files include only guest atoms while host atoms are removed 

419 agent1_cmip_host = pdb2cmip(agent_name = agent1_name, host_file = True, frame_structure=frame_structure, 

420 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds) 

421 agent2_cmip_host = pdb2cmip(agent_name = agent2_name, host_file = True, frame_structure=frame_structure, 

422 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds) 

423 agent1_cmip_guest = pdb2cmip(agent_name = agent1_name, host_file = False, frame_structure=frame_structure, 

424 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds) 

425 agent2_cmip_guest = pdb2cmip(agent_name = agent2_name, host_file = False, frame_structure=frame_structure, 

426 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds) 

427 

428 # Copy the source cmip inputs file in the local directory 

429 # Inputs will be modified to adapt the cmip grid to both agents together 

430 # Note than modified inputs file is not conserved along frames 

431 # Structures may change along trajectory thus requiring a different grid size 

432 cmip_inputs = File(f'{output_directory}/{CMIP_INPUTS_FILE}') 

433 copyfile(cmip_inputs_source.path, cmip_inputs.path) 

434 

435 # Set the CMIP box dimensions and densities to fit both the host and the guest 

436 # Box origin and size are modified in the cmip inputs 

437 # Values returned are only used for display / debug purposes 

438 box_origin, box_size = adapt_cmip_grid(agent1_cmip_guest, agent2_cmip_guest, cmip_inputs) 

439 

440 # If the debug flag is passed then, instead of calculating energies, leave it all ready and stop here 

441 if debug: 

442 # Copy in the energies folder a small python script used to sum output energies 

443 debug_script = File(f'{output_directory}/{DEBUG_ENERGIES_SUM_SCRIPT}') 

444 copyfile(debug_script_source.path, debug_script.path) 

445 # Copy in the energies folder the VDM parameters input file 

446 debug_vdw_parameters = File(f'{output_directory}/{VDW_PARAMETERS}') 

447 copyfile(vdw_source.path, debug_vdw_parameters.path) 

448 # Set auxiliar output filenames 

449 debug_output_1 = 'first_output.pdb' 

450 debug_output_2 = 'second_output.pdb' 

451 # Write a README file explaining what to do to manually debug CMIP 

452 debug_readme = File(f'{output_directory}/README') 

453 with open(debug_readme.path, 'w') as file: 

454 file.write( 

455 '# Energies debug\n\n' 

456 '# Run CMIP\n' 

457 f'cmip -i {CMIP_INPUTS_FILE} -pr {agent1_cmip_guest.filename} -vdw {VDW_PARAMETERS} -hs {agent2_cmip_host.filename} -byat {debug_output_1}\n\n' 

458 '# Run CMIP inverting host and guest\n' 

459 f'cmip -i {CMIP_INPUTS_FILE} -pr {agent2_cmip_guest.filename} -vdw {VDW_PARAMETERS} -hs {agent1_cmip_host.filename} -byat {debug_output_2}\n\n' 

460 '# Sum both energies and compare\n' 

461 f'python {DEBUG_ENERGIES_SUM_SCRIPT} {debug_output_1} {debug_output_2}\n' 

462 ) 

463 raise SystemExit(' READY TO DEBUG -> Please go to the corresponding replica "energies" directory and follow the README instructions') 

464 

465 # Run the CMIP software to get the desired energies 

466 print(f' Calculating energies for {agent1_name} as guest and {agent2_name} as host') 

467 agent1_atom_energies = get_cmip_energies(cmip_inputs, agent1_cmip_guest, agent2_cmip_host) 

468 print(f' Calculating energies for {agent2_name} as guest and {agent1_name} as host') 

469 agent2_atom_energies = get_cmip_energies(cmip_inputs, agent2_cmip_guest, agent1_cmip_host) 

470 

471 # Print total energies at the end for every agent if the verbose flag has been passed 

472 if verbose: 

473 for agent_name, agent_energies in zip([agent1_name, agent2_name], [agent1_atom_energies, agent2_atom_energies]): 

474 total_vdw = total_es = total_both = 0 

475 for atom_energies in agent_energies: 

476 total_vdw += atom_energies[0] 

477 total_es += atom_energies[1] 

478 total_both += atom_energies[2] 

479 print(f' Total energies for {agent_name}: vmd {total_vdw}, es {total_es}, both {total_both}') 

480 

481 # DANI: Usa esto para escribir los resultados de las energías por átomo 

482 # sample = { 

483 # 'agent1': { 'energies': agent1_atom_energies, 'pdb': agent1_cmip }, 

484 # 'agent2': { 'energies': agent2_atom_energies, 'pdb': agent2_cmip }, 

485 # 'box': { 'origin': box_origin, 'size': box_size }  

486 # } 

487 # save_json(sample, 'energies_sample.json') 

488 

489 data.append({ 'agent1': agent1_atom_energies, 'agent2': agent2_atom_energies }) 

490 

491 # Erase the 2 previous log lines 

492 print(ERASE_3_PREVIOUS_LINES) 

493 

494 return data 

495 

496 # Extract the energies for each frame in a reduced trajectory 

497 frames, step, count = get_pdb_frames(energies_structure_file.path, trajectory_file.path, snapshots, frames_limit) 

498 non_exceeding_interactions = [interaction for interaction in interactions if not interaction.get('exceeds', False)] 

499 

500 # Load backup data in case there is a backup file 

501 if energies_backup.exists: 

502 print(' Recovering energies backup') 

503 interactions_data = load_json(energies_backup.path) 

504 else: 

505 interactions_data = [[] for interaction in non_exceeding_interactions] 

506 for frame_number, current_frame_pdb in enumerate(frames): 

507 # If we already have this frame in the backup then skip it 

508 if frame_number < len(interactions_data[0]): 

509 continue 

510 # Run the main analysis over the current frame 

511 # Append the result data for each interaction 

512 current_frame_structure = Structure.from_pdb_file(current_frame_pdb) 

513 frame_energies_data = get_frame_energy(current_frame_structure) 

514 for i, data in enumerate(frame_energies_data): 

515 interactions_data[i].append(data) 

516 # Save a backup just in case the process is interrupted further 

517 save_json(interactions_data, energies_backup.path) 

518 

519 # Cleanup here some CMIP residual files since it is not to be run again 

520 # Remove the restart file since we do not use it and it may be heavy sometimes 

521 if restart_file.exists: 

522 restart_file.remove() 

523 # Remove fortran unit files generated when running CMIP 

524 # Note that we can not define where these files are written but they appear where CMIP is run 

525 fortran_unit_files = glob('fort.*') 

526 for filepath in fortran_unit_files: 

527 remove(filepath) 

528 

529 # Now calculated atom average values through all frames for each pair of interaction agents 

530 output_summary = [] 

531 for i, interaction in enumerate(non_exceeding_interactions): 

532 # Check if the interaction as been marked as 'exceeds', in which case we skip it 

533 if interaction.get('exceeds', False): continue 

534 # Get the numerated output filepath for this specific interaction 

535 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i) 

536 # Add the root of the output analysis filename to the run data 

537 analysis_name = get_analysis_name(numbered_output_analysis_filepath) 

538 # Get the interaction name 

539 name = interaction['name'] 

540 # Add this interaction to the final summary 

541 output_summary.append({ 

542 'name': name, 

543 'analysis': analysis_name 

544 }) 

545 # If the output file already exists then skip this iteration 

546 if exists(numbered_output_analysis_filepath): continue 

547 # Get the main data 

548 data = interactions_data[i] 

549 # Format data 

550 agent1_output = format_data([ frame['agent1'] for frame in data ]) 

551 agent2_output = format_data([ frame['agent2'] for frame in data ]) 

552 # Format the results data and append it to the output data 

553 output = { 

554 'name': name, 

555 'agent1': agent1_output, 

556 'agent2': agent2_output, 

557 'version': '1.0.0' 

558 } 

559 # Export the current interaction analysis in json format 

560 save_json(output, numbered_output_analysis_filepath) 

561 

562 # Finally, export the summary in json format 

563 save_json(output_summary, output_analysis_filepath) 

564 

565 # Remove the backup to avoid accidentally reusing it when the output file is deleted 

566 energies_backup.remove() 

567 

568 # Finally remove the reduced topology 

569 energies_structure_file.remove() 

570 

571# Given a topology (e.g. pdb, prmtop), extract the atom elements in a CMIP friendly format 

572# Hydrogens bonded to carbons remain as 'H' 

573# Hydrogens bonded to oxygen are renamed as 'HO' 

574# Hydrogens bonded to nitrogen or sulfur are renamed as 'HN' 

575# Some heavy atom elements may be also modified (e.g. 'CL' -> 'Cl') 

576def set_cmip_elements (structure : 'Structure'): 

577 # Iterate over each atom to fix their element according to CMIP standards 

578 for a, atom in enumerate(structure.atoms): 

579 element = atom.element 

580 # Adapt hydrogens element to CMIP requirements 

581 if element == 'H': 

582 # We must find the element of the heavy atom this hydrogen is bonded to 

583 atom_bonds = atom.get_bonds() 

584 # There should be always only 1 bond 

585 if len(atom_bonds) != 1: 

586 print('Atom ' + atom.name + ' (' + str(atom.index) + ') has ' + str(len(atom_bonds)) + ' bonds') 

587 raise ValueError('An hydrogen should always have one and only one bond') 

588 bonded_atom_index = atom_bonds[0] 

589 bonded_atom_element = structure.atoms[bonded_atom_index].element 

590 # Hydrogens bonded to carbons remain as 'H' 

591 if bonded_atom_element == 'C': 

592 pass 

593 # Hydrogens bonded to oxygen are renamed as 'HO' 

594 elif bonded_atom_element == 'O': 

595 element = 'HO' 

596 # Hydrogens bonded to nitrogen or sulfur are renamed as 'HN' 

597 elif bonded_atom_element == 'N' or bonded_atom_element == 'S': 

598 element = 'HN' 

599 else: 

600 raise SystemExit( 

601 'ERROR: Hydrogen bonded to not supported heavy atom: ' + bonded_atom_element) 

602 atom.element = element 

603 

604# Set residue names with no endings 

605protein_mid_residue_names = { name for name in PROTEIN_RESIDUE_NAME_LETTERS if name[-1] not in {'N','C'} } 

606nucleic_mid_residue_names = { name for name in NUCLEIC_RESIDUE_NAME_LETTERS if name[-1] not in {'5','3'} } 

607 

608# Change residue names in a structure to meet the CMIP requirements 

609# Change terminal residue names by adding an 'N' or 'C' 

610def name_terminal_residues (structure : 'Structure'): 

611 # Iterate chains 

612 for chain in structure.chains: 

613 # Check if the first residue is tagged as a terminal residue 

614 # If not, rename it 

615 first_residue = chain.residues[0] 

616 # In case it is a protein 

617 if first_residue.name in protein_mid_residue_names: 

618 first_residue.name += 'N' 

619 # In case it is RNA 

620 elif first_residue.name in nucleic_mid_residue_names: 

621 first_residue.name += '5' 

622 # Check if the last residue is tagged as 'C' terminal 

623 # If not, rename it 

624 last_residue = chain.residues[-1] 

625 # In case it is a protein 

626 if last_residue.name in protein_mid_residue_names: 

627 last_residue.name += 'C' 

628 # In case it is RNA 

629 elif last_residue.name in nucleic_mid_residue_names: 

630 last_residue.name += '3' 

631 

632def mine_cmip_output (logs): 

633 center, density, units = (), (), () 

634 grid_density_exp = r"^\s*Grid density:\s+(\d+)\s+(\d+)\s+(\d+)" 

635 grid_center_exp = r"^\s*Grid center:([- ]+\d+.\d+)([- ]+\d+.\d+)([- ]+\d+.\d+)" 

636 grid_units_exp = r"^\s*Grid units:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)" 

637 for line in logs: 

638 grid_center_groups = re.match(grid_center_exp, line) 

639 grid_density_groups = re.match(grid_density_exp, line) 

640 grid_units_groups = re.match(grid_units_exp, line) 

641 if grid_density_groups: 

642 density = tuple(float(grid_density_groups.group(i)) 

643 for i in (1, 2, 3)) 

644 if grid_center_groups: 

645 center = tuple(float(grid_center_groups.group(i)) 

646 for i in (1, 2, 3)) 

647 if grid_units_groups: 

648 units = tuple(float(grid_units_groups.group(i)) 

649 for i in (1, 2, 3)) 

650 # If data mining fails there must be something wrong with the CMIP output 

651 if center == () or density == () or units == (): 

652 for line in logs: 

653 print(line) 

654 print('WARNING: CMIP output mining failed') 

655 raise SystemExit('ERROR: Something was wrong with CMIP') 

656 return center, density, units 

657 

658# This function is used to create new grid parameters 

659# The new grid is expected to cover both input grids: agent 1 grid and agent 2 grid 

660def compute_new_grid ( 

661 agent1_center, 

662 agent1_density, 

663 agent1_units, 

664 agent2_center, 

665 agent2_density, 

666 agent2_units, 

667 extra_density=1): 

668 new_center = [] # [(i + j) / 2 for i, j in zip(agent1_center, agent2_center)] 

669 new_density = [] 

670 

671 for k in range(3): 

672 min_prot = agent1_center[k] - agent1_density[k] * agent1_units[k] 

673 min_lig = agent2_center[k] - agent2_density[k] * agent2_units[k] 

674 min_new = min(min_prot, min_lig) 

675 max_prot = agent1_center[k] + agent1_density[k] * agent1_units[k] 

676 max_lig = agent2_center[k] + agent2_density[k] * agent2_units[k] 

677 max_new = max(max_prot, max_lig) 

678 

679 dnew = int(abs(max_new - min_new) + extra_density) 

680 cnew = min_new + (dnew / 2) 

681 new_density.append(dnew) 

682 new_center.append(cnew) 

683 return new_center, new_density 

684 

685# Format data grouping atom energies by average ES/VDW 

686# Calculate values for the whole set of frames analyzed 

687# The canclulate it also for the 20% first frames and the 20% last frames separatedly 

688def format_data (data : list) -> dict: 

689 

690 # First, reorder data by atoms and energies 

691 atom_count = len(data[0]) 

692 

693 atom_vdw_values = [[] for n in range(atom_count)] 

694 atom_es_values = [[] for n in range(atom_count)] 

695 atom_both_values = [[] for n in range(atom_count)] 

696 for frame in data: 

697 for a, values in enumerate(frame): 

698 atom_vdw_values[a].append(values[0]) 

699 atom_es_values[a].append(values[1]) 

700 atom_both_values[a].append(values[2]) 

701 

702 # Calculate the atom averages from each energy 

703 atom_vdw_avg = [sum(v) / len(v) for v in atom_vdw_values] 

704 atom_es_avg = [sum(v) / len(v) for v in atom_es_values] 

705 atom_both_avg = [sum(v) / len(v) for v in atom_both_values] 

706 

707 # Calculate the atom averages from each energy at the beginig and end of the trajectory 

708 # We take the initial 20% and the final 20% of frames to calculate each respectively 

709 p20_frames = len(data) 

710 p20 = round(p20_frames*0.2) 

711 if p20 == 0: p20 = 1 

712 

713 # Initials 

714 atom_vdw_values_initial = [[] for n in range(atom_count)] 

715 atom_es_values_initial = [[] for n in range(atom_count)] 

716 atom_both_values_initial = [[] for n in range(atom_count)] 

717 for frame in data[:p20]: 

718 for a, values in enumerate(frame): 

719 atom_vdw_values_initial[a].append(values[0]) 

720 atom_es_values_initial[a].append(values[1]) 

721 atom_both_values_initial[a].append(values[2]) 

722 

723 atom_vdw_avg_initial = [sum(v) / len(v) for v in atom_vdw_values_initial] 

724 atom_es_avg_initial = [sum(v) / len(v) for v in atom_es_values_initial] 

725 atom_both_avg_initial = [sum(v) / len(v) for v in atom_both_values_initial] 

726 

727 # Finals 

728 atom_vdw_values_final = [[] for n in range(atom_count)] 

729 atom_es_values_final = [[] for n in range(atom_count)] 

730 atom_both_values_final = [[] for n in range(atom_count)] 

731 for frame in data[-p20:]: 

732 for a, values in enumerate(frame): 

733 atom_vdw_values_final[a].append(values[0]) 

734 atom_es_values_final[a].append(values[1]) 

735 atom_both_values_final[a].append(values[2]) 

736 

737 atom_vdw_avg_final = [sum(v) / len(v) for v in atom_vdw_values_final] 

738 atom_es_avg_final = [sum(v) / len(v) for v in atom_es_values_final] 

739 atom_both_avg_final = [sum(v) / len(v) for v in atom_both_values_final] 

740 

741 # Format the results data and append it to the output data 

742 output = { 

743 'vdw': atom_vdw_avg, 

744 'es': atom_es_avg, 

745 'both': atom_both_avg, 

746 'ivdw': atom_vdw_avg_initial, 

747 'ies': atom_es_avg_initial, 

748 'iboth': atom_both_avg_initial, 

749 'fvdw': atom_vdw_avg_final, 

750 'fes': atom_es_avg_final, 

751 'fboth': atom_both_avg_final, 

752 } 

753 

754 return output