Coverage for mddb_workflow/analyses/energies.py: 8%

1# Energies

3# The energies analysis is carried by ACPYPE and a locally developed tool: CMIP.

4# ACPYPE is a tool based in Python to use Antechamber to generate topologies for chemical compounds and to interface with others python applications.

5# CMIP stands for Classical Molecular Interaction Potential and it is usefull to predict electrostatic and Van der Waals potentials.

7# ACPYPE:

8# SOUSA DA SILVA, A. W. & VRANKEN, W. F. ACPYPE - AnteChamber PYthon Parser interfacE. BMC Research Notes 5 (2012), 367 doi: 10.1186/1756-0500-5-367 http://www.biomedcentral.com/1756-0500/5/367

9# BATISTA, P. R.; WILTER, A.; DURHAM, E. H. A. B. & PASCUTTI, P. G. Molecular Dynamics Simulations Applied to the Study of Subtypes of HIV-1 Protease. Cell Biochemistry and Biophysics 44 (2006), 395-404. doi: 10.1385/CBB:44:3:395

10#

11# Antechamber:

12# WANG, J., WANG, W., KOLLMAN, P. A., and CASE, D. A. Automatic atom type and bond type perception in molecular mechanical calculations. Journal of Molecular Graphics and Modelling 25, 2 (2006), 247–260. doi: 10.1016/j.jmgm.2005.12.005

13# WANG, J., WOLF, R. M., CALDWELL, J. W., KOLLMAN, P. A., and CASE, D. A. Development and testing of a General Amber Force Field. Journal of Computational Chemistry 25, 9 (2004), 1157–1174. doi: 10.1002/jcc.20035

14#

15# CMIP:

16# Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001), Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins. Proteins, 45: 428-437. doi:10.1002/prot.1159

18# Imports libraries

19from os import mkdir, remove

20from os.path import exists

21from shutil import copyfile

22from glob import glob

23import re

24import math

25from subprocess import run, PIPE

27from mddb_workflow.tools.get_pdb_frames import get_pdb_frames

28from mddb_workflow.utils.auxiliar import load_json, save_json, warn, numerate_filename, get_analysis_name

29from mddb_workflow.utils.auxiliar import ForcedStop

30from mddb_workflow.utils.constants import OUTPUT_ENERGIES_FILENAME

31from mddb_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS

32from mddb_workflow.utils.constants import CMIP_INPUTS_CHECKONLY_SOURCE, CMIP_INPUTS_SOURCE

33from mddb_workflow.utils.constants import CMIP_VDW_SOURCE, ENERGIES_DEBUG_SCRIPT_SOURCE

34from mddb_workflow.utils.structures import Structure

35from mddb_workflow.utils.file import File

36from mddb_workflow.utils.type_hints import *

38CURSOR_UP_ONE = '\x1b[1A'

39ERASE_LINE = '\x1b[2K'

40ERASE_3_PREVIOUS_LINES = CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE

42def energies (

43 trajectory_file : File,

44 # Set a folder to be created in order to store residual output files from this analysis

45 output_directory : str,

46 structure : 'Structure',

47 interactions : list,

48 charges : list,

49 snapshots : int,

50 frames_limit : int = 100,

51 verbose : bool = False,

52 debug : bool = False):

53 """Perform the electrostatic and vdw energies analysis for each pair of interaction agents."""

55 # Make sure we have interactions

56 if not interactions or len(interactions) == 0:

57 print('No interactions were specified')

58 return

60 # Make sure we have charges

61 if not charges or type(charges) == Exception:

62 print('Atom charges are not available -> This analysis will be skipped')

63 return

65 # Set the main output filepath

66 output_analysis_filepath = f'{output_directory}/{OUTPUT_ENERGIES_FILENAME}'

68 # Set the auxiliar files further required as CMIP inputs

69 cmip_inputs_checkonly_source = File(CMIP_INPUTS_CHECKONLY_SOURCE)

70 cmip_inputs_source = File(CMIP_INPUTS_SOURCE)

71 vdw_source = File(CMIP_VDW_SOURCE)

72 debug_script_source = File(ENERGIES_DEBUG_SCRIPT_SOURCE)

74 # Set a backup file to store some results on the fly

75 # This is useful to restore these values in case the analysis is disrupt since it is a long analysis

76 energies_backup = File(output_directory + '/backup.json')

78 # Define an additional file generated by CMIP which must be handled (written in the energies folder and deleted)

79 restart_file = File(output_directory + '/restart')

81 # Check the number of atoms on each interacting agent

82 # If there is any agent with more than 80000 atoms CMIP will fail so we must skip this specific energies analysis by now

83 # DANI: Este valor límite se puede cambiar en CMIP, pero hay que recompilar y eso no es banal en un environment de conda

84 cmip_atom_limit = 80000

85 for interaction in interactions:

86 exceeds = False

87 for agent in ['1', '2']:

88 atoms = interaction[f'atom_indices_{agent}']

89 atom_count = len(atoms)

90 if atom_count >= cmip_atom_limit:

91 exceeds = True

92 break

93 if exceeds:

94 warn(f'{interaction["name"]} is exceeding the CMIP atom count limit of {cmip_atom_limit} and it will be skipped for this analysis')

95 interaction['exceeds'] = True

97 # This anlaysis produces many residual output files

98 # Create a new folder to store all ouput files so they do not overcrowd the main directory

99 if not exists(output_directory):

100 mkdir(output_directory)

101

102 # Adapt the structure for cmip

103 energies_structure = structure.copy()

104

105 # Rename residues according to if they are terminals

106 name_terminal_residues(energies_structure)

107

108 # Set each atom element in CMIP format

109 set_cmip_elements(energies_structure)

110

111 # Save the structure back to a pdb

112 energies_structure_file = File(output_directory + '/energies.pdb')

113 energies_structure.generate_pdb_file(energies_structure_file.path)

114

115 # Transform an agent structure to a cmip input pdb, which includes charges and cmip-friendly elements

116 # If this is to be a host file then leave only atoms involved in the interaction: both host and guest agents

117 # If this is to be a host file, all guest atoms are set dummy* as well

118 # If this is to be a guest file then remove all host atoms as well

119 # Note that the frame structure is not the standard structure, which would already include charges

120 # For this reason, charges are included in the frame structure before

121 # Also flag some atoms as 'dummy' by adding a 'X' before the element

122 # *Dummy atoms are not considered in the calculation but they stand for a region with low dielectric

123 # If removed, the void left by these atoms would be considered to be filled with solvent, which has a high dielectric

124 # These dielectric differences have a strong impact on the calculation

125 # By default we set as dummy host atoms involved in a covalent bond with guest atoms

126 def pdb2cmip (

127 agent_name : str,

128 host_file : bool,

129 frame_structure : 'Structure',

130 host_selection : 'Selection',

131 guest_selection : 'Selection',

132 strong_bonds : Optional[list]

133 ) -> File:

134 # Get atom indices for host atoms involved in a covalent bond with guest atoms

135 # If this is the host, they will be further marked as dummy for CMIP to ignore them in the calculations

136 # If this is the guest, they will be removed

137 strong_bond_atom_indices = []

138 if strong_bonds:

139 for bond in strong_bonds:

140 strong_bond_atom_indices += bond

141 # Set atoms to be flagged as dummy

142 dummy_atom_indices = set()

143 # If this is to be the host file then set guest atoms and strong bond atoms dummy

144 if host_file:

145 dummy_atom_indices |= set(strong_bond_atom_indices)

146 dummy_atom_indices |= set(guest_selection.atom_indices)

147 # Set which atoms must be kept in the output file

148 # If this is the host file then keep both host and guest atoms

149 # If this is the guest file then keep only guest atoms

150 # Always remove strong bond atoms in the guest

151 selection = host_selection + guest_selection if host_file else guest_selection

152 strong_bond_selection = frame_structure.select_atom_indices(strong_bond_atom_indices)

153 guest_strong_bonds = guest_selection & strong_bond_selection

154 selection -= guest_strong_bonds

155 # Filter the selected atoms in the structure

156 selected_structure = frame_structure.filter(selection)

157 # Set atom charges (non standard attribute), which are used further to write the adapted cmip pdb

158 # Set also the elements to macth the original structure, since the frame generator messes the elements

159 for a, atom in enumerate(selected_structure.atoms):

160 atom_index = selection.atom_indices[a]

161 charge = charges[atom_index]

162 setattr(atom, 'charge', charge)

163 cmip_element = energies_structure.atoms[atom_index].element

164 atom.element = cmip_element

165 # Write a special pdb which contains charges as CMIP expects to find them and dummy atoms flagged

166 output_filepath = f'{output_directory}/{agent_name}_{"host" if host_file else "guest"}.cmip.pdb'

167 with open(output_filepath, "w") as file:

168 # Write a line for each atom

169 for a, atom in enumerate(selected_structure.atoms):

170 index = str(a+1).rjust(5)

171 atom_name = atom.name

172 name = ' ' + atom_name.ljust(3) if len(atom_name) < 4 else atom_name

173 residue = atom.residue

174 residue_name = residue.name.ljust(3)

175 chain = atom.chain

176 chain_name = chain.name.rjust(1)

177 residue_number = str(residue.number).rjust(4)

178 icode = residue.icode.rjust(1)

179 coords = atom.coords

180 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in coords ]

181 charge = "{:.4f}".format(atom.charge) # Charge was manually added before, it is not a standard attribute

182 # In case this atom is making an strong bond between both interacting agents we add an 'X' before the element

183 # This way CMIP will ignore the atom. Otherwise it would return high non-sense Van der Waals values

184 real_index = selection.atom_indices[a]

185 # Set if atom is to be ignored

186 is_dummy = real_index in dummy_atom_indices

187 cmip_dummy_flag = 'X' if is_dummy else ''

188 element = atom.element

189 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name + ' '

190 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord

191 + ' ' + str(charge).rjust(7) + ' ' + cmip_dummy_flag + element + '\n')

192 file.write(atom_line)

193

194 return File(output_filepath)

195

196 # Run CMIP in 'checkonly' mode (i.e. start and stop) for both agents

197 # Mine the grid generated by CMIP for both agents and calculate a new grid which would include both

198 # Finally, modify the provided 'cmip_inputs' with the new grid parameters

199 def adapt_cmip_grid (agent1_cmip_pdb : File, agent2_cmip_pdb : File, cmip_inputs : File) -> tuple:

200

201 # Set a name for the checkonly CMIP outputs

202 # This name is not important, since the data we want is in the CMIP logs

203 cmip_checkonly_output = File(output_directory + '/checkonly.energy.pdb')

204

205 # Run CMIP in 'checkonly' mode and save the grid dimensions output

206 # First do it for the agent 1

207 cmip_logs_agent1 = run([

208 "cmip",

209 "-i",

210 cmip_inputs_checkonly_source.path,

211 "-pr",

212 agent1_cmip_pdb.path,

213 "-vdw",

214 vdw_source.path,

215 "-hs",

216 agent2_cmip_pdb.path,

217 "-byat",

218 cmip_checkonly_output.path,

219 "-rst",

220 restart_file.path

221 ], stdout=PIPE, stderr=PIPE).stdout.decode()

222

223 # Mine the grid dimensions from CMIP logs

224 agent1_center, agent1_density, agent1_units = mine_cmip_output(

225 cmip_logs_agent1.split("\n"))

226

227 # Run CMIP in 'checkonly' mode and save the grid dimensions output

228 # Now do it for the agent 2

229 cmip_logs_agent2 = run([

230 "cmip",

231 "-i",

232 cmip_inputs_checkonly_source.path,

233 "-pr",

234 agent2_cmip_pdb.path,

235 "-vdw",

236 vdw_source.path,

237 "-hs",

238 agent1_cmip_pdb.path,

239 "-byat",

240 cmip_checkonly_output.path,

241 "-rst",

242 restart_file.path

243 ], stdout=PIPE, stderr=PIPE).stdout.decode()

244

245 # Mine the grid dimensions from CMIP logs

246 agent2_center, agent2_density, agent2_units = mine_cmip_output(

247 cmip_logs_agent2.split("\n"))

248

249 # Calculate grid dimensions for a new grid which contains both previous grids

250 new_center, new_density = compute_new_grid(

251 agent1_center,

252 agent1_density,

253 agent1_units,

254 agent2_center,

255 agent2_density,

256 agent2_units)

257

258 # Here we must calculate how many grid points the new box would have

259 # In case the number of points exceeds a safe limit we must reduce it

260 # Otherwise CMIP could return the following error:

261 # ERROR when trying to allocate -2092351636 bytes of memory

262

263 # Set the limit number of grid points

264 # It has been found experimentally: 65945880 Grid points -> OK, 68903412 Grid points -> ERROR

265 grid_points_limit = 65000000

266

267 # Set the default grid 'cells' size for all three dimensions

268 grid_unit_size = 0.5

269

270 # Calculate the amount of total points with current parameters

271 current_grid_points = (new_density[0] + 1) * (new_density[1] + 1) * (new_density[2] + 1)

272

273 # In case the current number of grid points exceeds the limit...

274 # Reduce all dimension densities in proportion and expand the grid units size to compensate

275 if current_grid_points > grid_points_limit:

276 print(f'WARNING: Grid points limit is exceeded ({current_grid_points})')

277 proportion = grid_points_limit / current_grid_points

278 new_density[0] = math.ceil(new_density[0] * proportion)

279 new_density[1] = math.ceil(new_density[1] * proportion)

280 new_density[2] = math.ceil(new_density[2] * proportion)

281 grid_unit_size = math.ceil(grid_unit_size / proportion * 1000) / 1000

282 print(f'WARNING: Grid resolution has been reduced -> unit size = {grid_unit_size}')

283

284 # Set the new lines to be written in the local CMIP inputs file

285 grid_inputs = [

286 f" cenx={new_center[0]:.1f} \n",

287 f" ceny={new_center[1]:.1f} \n",

288 f" cenz={new_center[2]:.1f} \n",

289 f" dimx={int(new_density[0])} \n",

290 f" dimy={int(new_density[1])} \n",

291 f" dimz={int(new_density[2])} \n",

292 f" intx={grid_unit_size} \n",

293 f" inty={grid_unit_size} \n",

294 f" intz={grid_unit_size} \n",

295 ]

296 # Add previous lines to the local inputs file

297 with open(cmip_inputs.path, "r+") as file:

298 lines = file.readlines()

299 file.seek(0)

300 for line in lines:

301 if line == '&end \n':

302 for grid_input in grid_inputs:

303 file.write(grid_input)

304 file.write(line)

305

306 # Delete the 'ckeckonly' file

307 cmip_checkonly_output.remove()

308

309 # Calculate the resulting box origin and size and return both values

310 # These values are used for display / debug purposes only

311 new_size = (new_density[0] * grid_unit_size, new_density[1] * grid_unit_size, new_density[2] * grid_unit_size)

312 new_origin = (new_center[0] - new_size[0] / 2, new_center[1] - new_size[1] / 2, new_center[2] - new_size[2] / 2)

313 return new_origin, new_size

314

315 # Run the CMIP software to get the desired energies

316 def get_cmip_energies (cmip_inputs : File, guest : File, host : File) -> tuple:

317 # Set the cmip output filename, which is to be read rigth after it is generated

318 cmip_output_file = File(output_directory + '/cmip_output.pdb')

319 # Run cmip

320 cmip_process = run([

321 "cmip",

322 "-i",

323 cmip_inputs.path,

324 "-pr",

325 guest.path,

326 "-vdw",

327 vdw_source.path,

328 "-hs",

329 host.path,

330 "-byat",

331 cmip_output_file.path,

332 "-rst",

333 restart_file.path,

334 ], stdout=PIPE, stderr=PIPE)

335 cmip_logs = cmip_process.stdout.decode()

336 # If the output does note exist at this poin then it means something went wrong with CMIP

337 if not exists(cmip_output_file.path):

338 print(cmip_logs)

339 cmip_error_logs = cmip_process.stderr.decode()

340 print(cmip_error_logs)

341 raise RuntimeError('Something went wrong with CMIP!')

342 # Mine the electrostatic (es) and Van der Walls (vdw) energies for each atom

343 # Group the results by atom adding their values

344 atom_energies = []

345 with open(cmip_output_file.path, 'r') as file:

346 lines = list(file)

347 # If this file is empty it means something went wrong with CMIP

348 # We print its logs and exit

349 if len(lines) == 0:

350 if type(cmip_logs) == str:

351 print(cmip_logs)

352 else:

353 for line in cmip_logs:

354 print(line)

355 raise RuntimeError('Something went wrong with CMIP!')

356 # Mine energies line by line (i.e. atom by atom)

357 for line in lines:

358 # WARNING: This works most times but there is an exception

359 # vdw = float(line[42:53])

360 # es = float(line[57:68])

361 # both = float(line[72:83])

362 # Numbers may become larger than expected when they are close to 0 since they are expressed in E notation

363 # e.g. 0.1075E-04

364 # For this reason we must mine these last values relying on whitespaces between them

365 line_splits = line.split()

366 vdw = float(line_splits[-3])

367 es = float(line_splits[-2])

368 both = float(line_splits[-1])

369 # Values greater than 100 are represented as 0

370 # This step is performed to filter 'infinity' values

371 energies = (vdw, es, both)

372 if both > 100:

373 #print('WARNING: We have extremly high values in energies which are beeing discarded')

374 energies = (0, 0, 0)

375 # Add current atom energy values to the atom energies list

376 atom_energies.append(energies)

377 return atom_energies

378

379 # Given a pdb structure, use CMIP to extract energies

380 # Output energies are returned by atom

381 def get_frame_energy (frame_structure : 'Structure') -> list[dict]:

382

383 # WARNING: At this point structure should be corrected

384 # WARNING: Repeated atoms will make the analysis fail

385

386 # Repeat the whole process for each interaction

387 data = []

388 for interaction in interactions:

389

390 # Check if the interaction has been marked as 'exceeds', in which case we skip it

391 if interaction.get('exceeds', False):

392 continue

393

394 # Get covalent bonds between both agents, if any

395 # They will be not taken in count during the calculation

396 # Otherwise we would have a huge energy peak in this atom since they are very close

397 strong_bonds = interaction.get('strong_bonds', None)

398

399 # Get interaction and agent names, just for the logs

400 interaction_name = interaction['name']

401 print(f' Processing {interaction_name}')

402 agent1_name = interaction['agent_1'].replace(' ', '_').replace('/', '_')

403 agent2_name = interaction['agent_2'].replace(' ', '_').replace('/', '_')

404 # Get agent atom selections

405 agent1_atom_indices = interaction[f'atom_indices_1']

406 agent1_selection = frame_structure.select_atom_indices(agent1_atom_indices)

407 if not agent1_selection:

408 raise ValueError(f'Empty agent 1 "{agent1_name}" from interaction "{interaction_name}"')

409 agent2_atom_indices = interaction[f'atom_indices_2']

410 agent2_selection = frame_structure.select_atom_indices(agent2_atom_indices)

411 if not agent2_selection:

412 raise ValueError(f'Empty agent 2 "{agent2_name}" from interaction "{interaction_name}"')

413

414 # Prepare the CMIP friendly input pdb structures for every calculation

415 # First prepare the host files and the prepare the guest files

416 # There is only a difference between host and guest files:

417 # Host files include both agent atoms but the guest atoms are all marked as dummy

418 # Guest files include only guest atoms while host atoms are removed

419 agent1_cmip_host = pdb2cmip(agent_name = agent1_name, host_file = True, frame_structure=frame_structure,

420 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)

421 agent2_cmip_host = pdb2cmip(agent_name = agent2_name, host_file = True, frame_structure=frame_structure,

422 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)

423 agent1_cmip_guest = pdb2cmip(agent_name = agent1_name, host_file = False, frame_structure=frame_structure,

424 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)

425 agent2_cmip_guest = pdb2cmip(agent_name = agent2_name, host_file = False, frame_structure=frame_structure,

426 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)

427

428 # Copy the source cmip inputs file in the local directory

429 # Inputs will be modified to adapt the cmip grid to both agents together

430 # Note than modified inputs file is not conserved along frames

431 # Structures may change along trajectory thus requiring a different grid size

432 cmip_inputs_filename = cmip_inputs_source.filename

433 cmip_inputs = File(f'{output_directory}/{cmip_inputs_filename}')

434 copyfile(cmip_inputs_source.path, cmip_inputs.path)

435

436 # Set the CMIP box dimensions and densities to fit both the host and the guest

437 # Box origin and size are modified in the cmip inputs

438 # Values returned are only used for display / debug purposes

439 box_origin, box_size = adapt_cmip_grid(agent1_cmip_guest, agent2_cmip_guest, cmip_inputs)

440

441 # If the debug flag is passed then, instead of calculating energies, leave it all ready and stop here

442 if debug:

443 # Copy in the energies folder a small python script used to sum output energies

444 debug_script_filename = debug_script_source.filename

445 debug_script = File(f'{output_directory}/{debug_script_filename}')

446 copyfile(debug_script_source.path, debug_script.path)

447 # Copy in the energies folder the VDM parameters input file

448 debug_vdw_filename = vdw_source.fiename

449 debug_vdw_parameters = File(f'{output_directory}/{debug_vdw_filename}')

450 copyfile(vdw_source.path, debug_vdw_parameters.path)

451 # Set auxiliar output filenames

452 debug_output_1 = 'first_output.pdb'

453 debug_output_2 = 'second_output.pdb'

454 # Write a README file explaining what to do to manually debug CMIP

455 debug_readme = File(f'{output_directory}/README')

456 with open(debug_readme.path, 'w') as file:

457 file.write(

458 '# Energies debug\n\n'

459 '# Run CMIP\n'

460 f'cmip -i {cmip_inputs_filename} -pr {agent1_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent2_cmip_host.filename} -byat {debug_output_1}\n\n'

461 '# Run CMIP inverting host and guest\n'

462 f'cmip -i {cmip_inputs_filename} -pr {agent2_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent1_cmip_host.filename} -byat {debug_output_2}\n\n'

463 '# Sum both energies and compare\n'

464 f'python {debug_script_filename} {debug_output_1} {debug_output_2}\n'

465 )

466 raise ForcedStop(' READY TO DEBUG -> Please go to the corresponding replica "energies" directory and follow the README instructions')

467

468 # Run the CMIP software to get the desired energies

469 print(f' Calculating energies for {agent1_name} as guest and {agent2_name} as host')

470 agent1_atom_energies = get_cmip_energies(cmip_inputs, agent1_cmip_guest, agent2_cmip_host)

471 print(f' Calculating energies for {agent2_name} as guest and {agent1_name} as host')

472 agent2_atom_energies = get_cmip_energies(cmip_inputs, agent2_cmip_guest, agent1_cmip_host)

473

474 # Add None in those places where there is a strong bond

475 strong_bond_atom_indices = set(sum(strong_bonds, []))

476 for i, atom_index in enumerate(agent1_atom_indices):

477 if atom_index in strong_bond_atom_indices:

478 agent1_atom_energies.insert(i, None)

479 for i, atom_index in enumerate(agent2_atom_indices):

480 if atom_index in strong_bond_atom_indices:

481 agent2_atom_energies.insert(i, None)

482

483 # Make sure the output size matches the number of atoms in the interaction

484 if len(agent1_atom_energies) != len(agent1_atom_indices):

485 print(f'Number of values: {len(agent1_atom_energies)}')

486 print(f'Number of atoms: {len(agent1_atom_indices)}')

487 raise ValueError('Missmatch in the number values and atoms in agent 1')

488 if len(agent2_atom_energies) != len(agent2_atom_indices):

489 print(f'Number of values: {len(agent2_atom_energies)}')

490 print(f'Number of atoms: {len(agent2_atom_indices)}')

491 raise ValueError('Missmatch in the number values and atoms in agent 2')

492

493 # Print total energies at the end for every agent if the verbose flag has been passed

494 if verbose:

495 for agent_name, agent_energies in zip([agent1_name, agent2_name], [agent1_atom_energies, agent2_atom_energies]):

496 total_vdw = total_es = total_both = 0

497 for atom_energies in agent_energies:

498 total_vdw += atom_energies[0]

499 total_es += atom_energies[1]

500 total_both += atom_energies[2]

501 print(f' Total energies for {agent_name}: vmd {total_vdw}, es {total_es}, both {total_both}')

502

503 # DANI: Usa esto para escribir los resultados de las energías por átomo

504 # sample = {

505 # 'agent1': { 'energies': agent1_atom_energies, 'pdb': agent1_cmip },

506 # 'agent2': { 'energies': agent2_atom_energies, 'pdb': agent2_cmip },

507 # 'box': { 'origin': box_origin, 'size': box_size }

508 # }

509 # save_json(sample, 'energies_sample.json')

510

511 data.append({ 'agent1': agent1_atom_energies, 'agent2': agent2_atom_energies })

512

513 # Erase the 2 previous log lines

514 print(ERASE_3_PREVIOUS_LINES)

515

516 return data

517

518 # Extract the energies for each frame in a reduced trajectory

519 frames, step, count = get_pdb_frames(energies_structure_file.path, trajectory_file.path, snapshots, frames_limit)

520 non_exceeding_interactions = [interaction for interaction in interactions if not interaction.get('exceeds', False)]

521

522 # Load backup data in case there is a backup file

523 if energies_backup.exists:

524 print(' Recovering energies backup')

525 interactions_data = load_json(energies_backup.path)

526 else:

527 interactions_data = [[] for interaction in non_exceeding_interactions]

528 for frame_number, current_frame_pdb in enumerate(frames):

529 # If we already have this frame in the backup then skip it

530 if frame_number < len(interactions_data[0]):

531 continue

532 # Run the main analysis over the current frame

533 # Append the result data for each interaction

534 current_frame_structure = Structure.from_pdb_file(current_frame_pdb)

535 frame_energies_data = get_frame_energy(current_frame_structure)

536 for i, data in enumerate(frame_energies_data):

537 interactions_data[i].append(data)

538 # Save a backup just in case the process is interrupted further

539 save_json(interactions_data, energies_backup.path)

540

541 # Cleanup here some CMIP residual files since it is not to be run again

542 # Remove the restart file since we do not use it and it may be heavy sometimes

543 if restart_file.exists:

544 restart_file.remove()

545 # Remove fortran unit files generated when running CMIP

546 # Note that we can not define where these files are written but they appear where CMIP is run

547 fortran_unit_files = glob('fort.*')

548 for filepath in fortran_unit_files:

549 remove(filepath)

550

551 # Now calculated atom average values through all frames for each pair of interaction agents

552 output_summary = []

553 for i, interaction in enumerate(non_exceeding_interactions):

554 # Check if the interaction as been marked as 'exceeds', in which case we skip it

555 if interaction.get('exceeds', False): continue

556 # Get the numerated output filepath for this specific interaction

557 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)

558 # Add the root of the output analysis filename to the run data

559 analysis_name = get_analysis_name(numbered_output_analysis_filepath)

560 # Get the interaction name

561 name = interaction['name']

562 # Add this interaction to the final summary

563 output_summary.append({

564 'name': name,

565 'analysis': analysis_name

566 })

567 # If the output file already exists then skip this iteration

568 if exists(numbered_output_analysis_filepath): continue

569 # Get the main data

570 data = interactions_data[i]

571 # Format data

572 agent1_output = format_data([ frame['agent1'] for frame in data ])

573 agent2_output = format_data([ frame['agent2'] for frame in data ])

574 # Format the results data and append it to the output data

575 output = {

576 'name': name,

577 'agent1': agent1_output,

578 'agent2': agent2_output,

579 'version': '1.1.0'

580 }

581 # Export the current interaction analysis in json format

582 save_json(output, numbered_output_analysis_filepath)

583

584 # Finally, export the summary in json format

585 save_json(output_summary, output_analysis_filepath)

586

587 # Remove the backup to avoid accidentally reusing it when the output file is deleted

588 energies_backup.remove()

589

590 # Finally remove the reduced topology

591 energies_structure_file.remove()

592

593# Given a topology (e.g. pdb, prmtop), extract the atom elements in a CMIP friendly format

594# Hydrogens bonded to carbons remain as 'H'

595# Hydrogens bonded to oxygen are renamed as 'HO'

596# Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'

597# Some heavy atom elements may be also modified (e.g. 'CL' -> 'Cl')

598def set_cmip_elements (structure : 'Structure'):

599 # Iterate over each atom to fix their element according to CMIP standards

600 for a, atom in enumerate(structure.atoms):

601 element = atom.element

602 # Adapt hydrogens element to CMIP requirements

603 if element == 'H':

604 # We must find the element of the heavy atom this hydrogen is bonded to

605 atom_bonds = atom.get_bonds()

606 # There should be always only 1 bond

607 if len(atom_bonds) != 1:

608 print(f'Atom {atom.name} ({atom.index}) has {len(atom_bonds)} bonds')

609 raise ValueError('An hydrogen should always have one and only one bond')

610 bonded_atom_index = atom_bonds[0]

611 bonded_atom_element = structure.atoms[bonded_atom_index].element

612 # Hydrogens bonded to carbons remain as 'H'

613 if bonded_atom_element == 'C':

614 pass

615 # Hydrogens bonded to oxygen are renamed as 'HO'

616 elif bonded_atom_element == 'O':

617 element = 'HO'

618 # Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'

619 elif bonded_atom_element == 'N' or bonded_atom_element == 'S':

620 element = 'HN'

621 else:

622 raise RuntimeError(f'Hydrogen bonded to not supported heavy atom: {bonded_atom_element}')

623 atom.element = element

624

625# Set residue names with no endings

626protein_mid_residue_names = { name for name in PROTEIN_RESIDUE_NAME_LETTERS if name[-1] not in {'N','C'} }

627nucleic_mid_residue_names = { name for name in NUCLEIC_RESIDUE_NAME_LETTERS if name[-1] not in {'5','3'} }

628

629# Change residue names in a structure to meet the CMIP requirements

630# Change terminal residue names by adding an 'N' or 'C'

631def name_terminal_residues (structure : 'Structure'):

632 # Iterate chains

633 for chain in structure.chains:

634 # Check if the first residue is tagged as a terminal residue

635 # If not, rename it

636 first_residue = chain.residues[0]

637 # In case it is a protein

638 if first_residue.name in protein_mid_residue_names:

639 first_residue.name += 'N'

640 # In case it is RNA

641 elif first_residue.name in nucleic_mid_residue_names:

642 first_residue.name += '5'

643 # Check if the last residue is tagged as 'C' terminal

644 # If not, rename it

645 last_residue = chain.residues[-1]

646 # In case it is a protein

647 if last_residue.name in protein_mid_residue_names:

648 last_residue.name += 'C'

649 # In case it is RNA

650 elif last_residue.name in nucleic_mid_residue_names:

651 last_residue.name += '3'

652

653def mine_cmip_output (logs):

654 center, density, units = (), (), ()

655 grid_density_exp = r"^\s*Grid density:\s+(\d+)\s+(\d+)\s+(\d+)"

656 grid_center_exp = r"^\s*Grid center:([- ]+\d+.\d+)([- ]+\d+.\d+)([- ]+\d+.\d+)"

657 grid_units_exp = r"^\s*Grid units:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)"

658 for line in logs:

659 grid_center_groups = re.match(grid_center_exp, line)

660 grid_density_groups = re.match(grid_density_exp, line)

661 grid_units_groups = re.match(grid_units_exp, line)

662 if grid_density_groups:

663 density = tuple(float(grid_density_groups.group(i))

664 for i in (1, 2, 3))

665 if grid_center_groups:

666 center = tuple(float(grid_center_groups.group(i))

667 for i in (1, 2, 3))

668 if grid_units_groups:

669 units = tuple(float(grid_units_groups.group(i))

670 for i in (1, 2, 3))

671 # If data mining fails there must be something wrong with the CMIP output

672 if center == () or density == () or units == ():

673 for line in logs:

674 print(line)

675 raise RuntimeError('CMIP output mining failed')

676 return center, density, units

677

678# This function is used to create new grid parameters

679# The new grid is expected to cover both input grids: agent 1 grid and agent 2 grid

680def compute_new_grid (

681 agent1_center,

682 agent1_density,

683 agent1_units,

684 agent2_center,

685 agent2_density,

686 agent2_units,

687 extra_density=1):

688 new_center = [] # [(i + j) / 2 for i, j in zip(agent1_center, agent2_center)]

689 new_density = []

690

691 for k in range(3):

692 min_prot = agent1_center[k] - agent1_density[k] * agent1_units[k]

693 min_lig = agent2_center[k] - agent2_density[k] * agent2_units[k]

694 min_new = min(min_prot, min_lig)

695 max_prot = agent1_center[k] + agent1_density[k] * agent1_units[k]

696 max_lig = agent2_center[k] + agent2_density[k] * agent2_units[k]

697 max_new = max(max_prot, max_lig)

698

699 dnew = int(abs(max_new - min_new) + extra_density)

700 cnew = min_new + (dnew / 2)

701 new_density.append(dnew)

702 new_center.append(cnew)

703 return new_center, new_density

704

705# Format data grouping atom energies by average ES/VDW

706# Calculate values for the whole set of frames analyzed

707# The canclulate it also for the 20% first frames and the 20% last frames separatedly

708def format_data (data : list) -> dict:

709

710 # First, reorder data by atoms and energies

711 atom_count = len(data[0])

712

713 atom_vdw_values = [[] for n in range(atom_count)]

714 atom_es_values = [[] for n in range(atom_count)]

715 atom_both_values = [[] for n in range(atom_count)]

716 for frame in data:

717 for a, values in enumerate(frame):

718 if values == None: continue

719 atom_vdw_values[a].append(values[0])

720 atom_es_values[a].append(values[1])

721 atom_both_values[a].append(values[2])

722

723 # Calculate the atom averages from each energy

724 atom_vdw_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values ]

725 atom_es_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values]

726 atom_both_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values]

727

728 # Calculate the atom averages from each energy at the beginig and end of the trajectory

729 # We take the initial 20% and the final 20% of frames to calculate each respectively

730 p20_frames = len(data)

731 p20 = round(p20_frames*0.2)

732 if p20 == 0: p20 = 1

733

734 # Initials

735 atom_vdw_values_initial = [[] for n in range(atom_count)]

736 atom_es_values_initial = [[] for n in range(atom_count)]

737 atom_both_values_initial = [[] for n in range(atom_count)]

738 for frame in data[:p20]:

739 for a, values in enumerate(frame):

740 if values == None: continue

741 atom_vdw_values_initial[a].append(values[0])

742 atom_es_values_initial[a].append(values[1])

743 atom_both_values_initial[a].append(values[2])

744

745 atom_vdw_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_initial]

746 atom_es_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_initial]

747 atom_both_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_initial]

748

749 # Finals

750 atom_vdw_values_final = [[] for n in range(atom_count)]

751 atom_es_values_final = [[] for n in range(atom_count)]

752 atom_both_values_final = [[] for n in range(atom_count)]

753 for frame in data[-p20:]:

754 for a, values in enumerate(frame):

755 if values == None: continue

756 atom_vdw_values_final[a].append(values[0])

757 atom_es_values_final[a].append(values[1])

758 atom_both_values_final[a].append(values[2])

759

760 atom_vdw_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_final]

761 atom_es_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_final]

762 atom_both_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_final]

763

764 # Format the results data and append it to the output data

765 output = {

766 'vdw': atom_vdw_avg,

767 'es': atom_es_avg,

768 'both': atom_both_avg,

769 'ivdw': atom_vdw_avg_initial,

770 'ies': atom_es_avg_initial,

771 'iboth': atom_both_avg_initial,

772 'fvdw': atom_vdw_avg_final,

773 'fes': atom_es_avg_final,

774 'fboth': atom_both_avg_final,

775 }

776

777 return output

Coverage for mddb_workflow / analyses / energies.py: 8%

368 statements