Coverage for mddb_workflow/analyses/energies.py: 9%
363 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1# Energies
2#
3# The energies analysis is carried by ACPYPE and a locally developed tool: CMIP.
4# ACPYPE is a tool based in Python to use Antechamber to generate topologies for chemical compounds and to interface with others python applications.
5# CMIP stands for Classical Molecular Interaction Potential and it is usefull to predict electrostatic and Van der Waals potentials.
6#
7# ACPYPE:
8# SOUSA DA SILVA, A. W. & VRANKEN, W. F. ACPYPE - AnteChamber PYthon Parser interfacE. BMC Research Notes 5 (2012), 367 doi: 10.1186/1756-0500-5-367 http://www.biomedcentral.com/1756-0500/5/367
9# BATISTA, P. R.; WILTER, A.; DURHAM, E. H. A. B. & PASCUTTI, P. G. Molecular Dynamics Simulations Applied to the Study of Subtypes of HIV-1 Protease. Cell Biochemistry and Biophysics 44 (2006), 395-404. doi: 10.1385/CBB:44:3:395
10#
11# Antechamber:
12# WANG, J., WANG, W., KOLLMAN, P. A., and CASE, D. A. Automatic atom type and bond type perception in molecular mechanical calculations. Journal of Molecular Graphics and Modelling 25, 2 (2006), 247–260. doi: 10.1016/j.jmgm.2005.12.005
13# WANG, J., WOLF, R. M., CALDWELL, J. W., KOLLMAN, P. A., and CASE, D. A. Development and testing of a General Amber Force Field. Journal of Computational Chemistry 25, 9 (2004), 1157–1174. doi: 10.1002/jcc.20035
14#
15# CMIP:
16# Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001), Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins. Proteins, 45: 428-437. doi:10.1002/prot.1159
18# Imports libraries
19from os import mkdir, remove
20from os.path import exists
21from shutil import copyfile
22from pathlib import Path
23from glob import glob
24import re
25import math
26from subprocess import run, PIPE
28from mddb_workflow.tools.get_pdb_frames import get_pdb_frames
29from mddb_workflow.utils.auxiliar import load_json, save_json, warn, numerate_filename, get_analysis_name
30from mddb_workflow.utils.constants import OUTPUT_ENERGIES_FILENAME
31from mddb_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS
32from mddb_workflow.utils.structures import Structure
33from mddb_workflow.utils.file import File
34from mddb_workflow.utils.type_hints import *
36CURSOR_UP_ONE = '\x1b[1A'
37ERASE_LINE = '\x1b[2K'
38ERASE_3_PREVIOUS_LINES = CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE
40# Set some auxiliar file names
41CMIP_INPUTS_FILE = 'cmip.in'
42CMIP_CHECK_INPUTS_FILE = 'cmip_check.in'
43VDW_PARAMETERS = 'vdwprm'
44DEBUG_ENERGIES_SUM_SCRIPT = 'get_energies_sum.py'
46# Set the path to the resources folder where we store auxiliar files required for this analysis
47resources = str(Path(__file__).parent.parent / "resources")
49def energies (
50 trajectory_file : File,
51 # Set a folder to be created in order to store residual output files from this analysis
52 output_directory : str,
53 structure : 'Structure',
54 interactions : list,
55 charges : list,
56 snapshots : int,
57 frames_limit : int = 100,
58 verbose : bool = False,
59 debug : bool = False):
60 """Perform the electrostatic and vdw energies analysis for each pair of interaction agents."""
62 # Make sure we have interactions
63 if not interactions or len(interactions) == 0:
64 print('No interactions were specified')
65 return
67 # Make sure we have charges
68 if not charges or type(charges) == Exception:
69 print('Atom charges are not available -> This analysis will be skipped')
70 return
72 # Set the main output filepath
73 output_analysis_filepath = f'{output_directory}/{OUTPUT_ENERGIES_FILENAME}'
75 # Set the auxiliar files further required as CMIP inputs
76 cmip_inputs_checkonly_source = File(f'{resources}/{CMIP_CHECK_INPUTS_FILE}')
77 cmip_inputs_source = File(f'{resources}/{CMIP_INPUTS_FILE}')
78 vdw_source = File(f'{resources}/{VDW_PARAMETERS}')
79 debug_script_source = File(f'{resources}/{DEBUG_ENERGIES_SUM_SCRIPT}')
81 # Set a backup file to store some results on the fly
82 # This is useful to restore these values in case the analysis is disrupt since it is a long analysis
83 energies_backup = File(output_directory + '/backup.json')
85 # Define an additional file generated by CMIP which must be handled (written in the energies folder and deleted)
86 restart_file = File(output_directory + '/restart')
88 # Check the number of atoms on each interacting agent
89 # If there is any agent with more than 80000 atoms CMIP will fail so we must skip this specific energies analysis by now
90 # DANI: Este valor límite se puede cambiar en CMIP, pero hay que recompilar y eso no es banal en un environment de conda
91 cmip_atom_limit = 80000
92 for interaction in interactions:
93 exceeds = False
94 for agent in ['1', '2']:
95 atoms = interaction[f'atom_indices_{agent}']
96 atom_count = len(atoms)
97 if atom_count >= cmip_atom_limit:
98 exceeds = True
99 break
100 if exceeds:
101 warn(f'{interaction["name"]} is exceeding the CMIP atom count limit of {cmip_atom_limit} and it will be skipped for this analysis')
102 interaction['exceeds'] = True
104 # This anlaysis produces many residual output files
105 # Create a new folder to store all ouput files so they do not overcrowd the main directory
106 if not exists(output_directory):
107 mkdir(output_directory)
109 # Adapt the structure for cmip
110 energies_structure = structure.copy()
112 # Rename residues according to if they are terminals
113 name_terminal_residues(energies_structure)
115 # Set each atom element in CMIP format
116 set_cmip_elements(energies_structure)
118 # Save the structure back to a pdb
119 energies_structure_file = File(output_directory + '/energies.pdb')
120 energies_structure.generate_pdb_file(energies_structure_file.path)
122 # Transform an agent structure to a cmip input pdb, which includes charges and cmip-friendly elements
123 # If this is to be a host file then leave only atoms involved in the interaction: both host and guest agents
124 # If this is to be a host file, all guest atoms are set dummy* as well
125 # If this is to be a guest file then remove all host atoms as well
126 # Note that the frame structure is not the standard structure, which would already include charges
127 # For this reason, charges are included in the frame structure before
128 # Also flag some atoms as 'dummy' by adding a 'X' before the element
129 # *Dummy atoms are not considered in the calculation but they stand for a region with low dielectric
130 # If removed, the void left by these atoms would be considered to be filled with solvent, which has a high dielectric
131 # These dielectric differences have a strong impact on the calculation
132 # By default we set as dummy host atoms involved in a covalent bond with guest atoms
133 def pdb2cmip (
134 agent_name : str,
135 host_file : bool,
136 frame_structure : 'Structure',
137 host_selection : 'Selection',
138 guest_selection : 'Selection',
139 strong_bonds : Optional[list]
140 ) -> File:
141 # Get atom indices for host atoms involved in a covalent bond with guest atoms
142 # If this is the host, they will be further marked as dummy for CMIP to ignore them in the calculations
143 # If this is the guest, they will be removed
144 strong_bond_atom_indices = []
145 if strong_bonds:
146 for bond in strong_bonds:
147 strong_bond_atom_indices += bond
148 # Set atoms to be flagged as dummy
149 dummy_atom_indices = set()
150 # If this is to be the host file then set guest atoms and strong bond atoms dummy
151 if host_file:
152 dummy_atom_indices |= set(strong_bond_atom_indices)
153 dummy_atom_indices |= set(guest_selection.atom_indices)
154 # Set which atoms must be kept in the output file
155 # If this is the host file then keep both host and guest atoms
156 # If this is the guest file then keep only guest atoms
157 # Always remove strong bond atoms in the guest
158 selection = host_selection + guest_selection if host_file else guest_selection
159 strong_bond_selection = frame_structure.select_atom_indices(strong_bond_atom_indices)
160 guest_strong_bonds = guest_selection & strong_bond_selection
161 selection -= guest_strong_bonds
162 # Filter the selected atoms in the structure
163 selected_structure = frame_structure.filter(selection)
164 # Set atom charges (non standard attribute), which are used further to write the adapted cmip pdb
165 # Set also the elements to macth the original structure, since the frame generator messes the elements
166 for a, atom in enumerate(selected_structure.atoms):
167 atom_index = selection.atom_indices[a]
168 charge = charges[atom_index]
169 setattr(atom, 'charge', charge)
170 cmip_element = energies_structure.atoms[atom_index].element
171 atom.element = cmip_element
172 # Write a special pdb which contains charges as CMIP expects to find them and dummy atoms flagged
173 output_filepath = f'{output_directory}/{agent_name}_{"host" if host_file else "guest"}.cmip.pdb'
174 with open(output_filepath, "w") as file:
175 # Write a line for each atom
176 for a, atom in enumerate(selected_structure.atoms):
177 index = str(a+1).rjust(5)
178 atom_name = atom.name
179 name = ' ' + atom_name.ljust(3) if len(atom_name) < 4 else atom_name
180 residue = atom.residue
181 residue_name = residue.name.ljust(3)
182 chain = atom.chain
183 chain_name = chain.name.rjust(1)
184 residue_number = str(residue.number).rjust(4)
185 icode = residue.icode.rjust(1)
186 coords = atom.coords
187 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in coords ]
188 charge = "{:.4f}".format(atom.charge) # Charge was manually added before, it is not a standard attribute
189 # In case this atom is making an strong bond between both interacting agents we add an 'X' before the element
190 # This way CMIP will ignore the atom. Otherwise it would return high non-sense Van der Waals values
191 real_index = selection.atom_indices[a]
192 # Set if atom is to be ignored
193 is_dummy = real_index in dummy_atom_indices
194 cmip_dummy_flag = 'X' if is_dummy else ''
195 element = atom.element
196 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name + ' '
197 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord
198 + ' ' + str(charge).rjust(7) + ' ' + cmip_dummy_flag + element + '\n')
199 file.write(atom_line)
201 return File(output_filepath)
203 # Run CMIP in 'checkonly' mode (i.e. start and stop) for both agents
204 # Mine the grid generated by CMIP for both agents and calculate a new grid which would include both
205 # Finally, modify the provided 'cmip_inputs' with the new grid parameters
206 def adapt_cmip_grid (agent1_cmip_pdb : File, agent2_cmip_pdb : File, cmip_inputs : File) -> tuple:
208 # Set a name for the checkonly CMIP outputs
209 # This name is not important, since the data we want is in the CMIP logs
210 cmip_checkonly_output = File(output_directory + '/checkonly.energy.pdb')
212 # Run CMIP in 'checkonly' mode and save the grid dimensions output
213 # First do it for the agent 1
214 cmip_logs_agent1 = run([
215 "cmip",
216 "-i",
217 cmip_inputs_checkonly_source.path,
218 "-pr",
219 agent1_cmip_pdb.path,
220 "-vdw",
221 vdw_source.path,
222 "-hs",
223 agent2_cmip_pdb.path,
224 "-byat",
225 cmip_checkonly_output.path,
226 "-rst",
227 restart_file.path
228 ], stdout=PIPE, stderr=PIPE).stdout.decode()
230 # Mine the grid dimensions from CMIP logs
231 agent1_center, agent1_density, agent1_units = mine_cmip_output(
232 cmip_logs_agent1.split("\n"))
234 # Run CMIP in 'checkonly' mode and save the grid dimensions output
235 # Now do it for the agent 2
236 cmip_logs_agent2 = run([
237 "cmip",
238 "-i",
239 cmip_inputs_checkonly_source.path,
240 "-pr",
241 agent2_cmip_pdb.path,
242 "-vdw",
243 vdw_source.path,
244 "-hs",
245 agent1_cmip_pdb.path,
246 "-byat",
247 cmip_checkonly_output.path,
248 "-rst",
249 restart_file.path
250 ], stdout=PIPE, stderr=PIPE).stdout.decode()
252 # Mine the grid dimensions from CMIP logs
253 agent2_center, agent2_density, agent2_units = mine_cmip_output(
254 cmip_logs_agent2.split("\n"))
256 # Calculate grid dimensions for a new grid which contains both previous grids
257 new_center, new_density = compute_new_grid(
258 agent1_center,
259 agent1_density,
260 agent1_units,
261 agent2_center,
262 agent2_density,
263 agent2_units)
265 # Here we must calculate how many grid points the new box would have
266 # In case the number of points exceeds a safe limit we must reduce it
267 # Otherwise CMIP could return the following error:
268 # ERROR when trying to allocate -2092351636 bytes of memory
270 # Set the limit number of grid points
271 # It has been found experimentally: 65945880 Grid points -> OK, 68903412 Grid points -> ERROR
272 grid_points_limit = 65000000
274 # Set the default grid 'cells' size for all three dimensions
275 grid_unit_size = 0.5
277 # Calculate the amount of total points with current parameters
278 current_grid_points = (new_density[0] + 1) * (new_density[1] + 1) * (new_density[2] + 1)
280 # In case the current number of grid points exceeds the limit...
281 # Reduce all dimension densities in proportion and expand the grid units size to compensate
282 if current_grid_points > grid_points_limit:
283 print(f'WARNING: Grid points limit is exceeded ({current_grid_points})')
284 proportion = grid_points_limit / current_grid_points
285 new_density[0] = math.ceil(new_density[0] * proportion)
286 new_density[1] = math.ceil(new_density[1] * proportion)
287 new_density[2] = math.ceil(new_density[2] * proportion)
288 grid_unit_size = math.ceil(grid_unit_size / proportion * 1000) / 1000
289 print(f'WARNING: Grid resolution has been reduced -> unit size = {grid_unit_size}')
291 # Set the new lines to be written in the local CMIP inputs file
292 grid_inputs = [
293 f" cenx={new_center[0]:.1f} \n",
294 f" ceny={new_center[1]:.1f} \n",
295 f" cenz={new_center[2]:.1f} \n",
296 f" dimx={int(new_density[0])} \n",
297 f" dimy={int(new_density[1])} \n",
298 f" dimz={int(new_density[2])} \n",
299 f" intx={grid_unit_size} \n",
300 f" inty={grid_unit_size} \n",
301 f" intz={grid_unit_size} \n",
302 ]
303 # Add previous lines to the local inputs file
304 with open(cmip_inputs.path, "r+") as file:
305 lines = file.readlines()
306 file.seek(0)
307 for line in lines:
308 if line == '&end \n':
309 for grid_input in grid_inputs:
310 file.write(grid_input)
311 file.write(line)
313 # Delete the 'ckeckonly' file
314 cmip_checkonly_output.remove()
316 # Calculate the resulting box origin and size and return both values
317 # These values are used for display / debug purposes only
318 new_size = (new_density[0] * grid_unit_size, new_density[1] * grid_unit_size, new_density[2] * grid_unit_size)
319 new_origin = (new_center[0] - new_size[0] / 2, new_center[1] - new_size[1] / 2, new_center[2] - new_size[2] / 2)
320 return new_origin, new_size
322 # Run the CMIP software to get the desired energies
323 def get_cmip_energies (cmip_inputs : File, guest : File, host : File) -> tuple:
324 # Set the cmip output filename, which is to be read rigth after it is generated
325 cmip_output_file = File(output_directory + '/cmip_output.pdb')
326 # Run cmip
327 cmip_logs = run([
328 "cmip",
329 "-i",
330 cmip_inputs.path,
331 "-pr",
332 guest.path,
333 "-vdw",
334 vdw_source.path,
335 "-hs",
336 host.path,
337 "-byat",
338 cmip_output_file.path,
339 "-rst",
340 restart_file.path,
341 ], stdout=PIPE, stderr=PIPE).stdout.decode()
342 # Mine the electrostatic (es) and Van der Walls (vdw) energies for each atom
343 # Group the results by atom adding their values
344 atom_energies = []
345 with open(cmip_output_file.path, 'r') as file:
346 lines = list(file)
347 # If this file is empty it means something went wrong with CMIP
348 # We print its logs and exit
349 if len(lines) == 0:
350 if type(cmip_logs) == str:
351 print(cmip_logs)
352 else:
353 for line in cmip_logs:
354 print(line)
355 raise SystemExit('ERROR: Something went wrong with CMIP!')
356 # Mine energies line by line (i.e. atom by atom)
357 for line in lines:
358 # WARNING: This works most times but there is an exception
359 # vdw = float(line[42:53])
360 # es = float(line[57:68])
361 # both = float(line[72:83])
362 # Numbers may become larger than expected when they are close to 0 since they are expressed in E notation
363 # e.g. 0.1075E-04
364 # For this reason we must mine these last values relying on whitespaces between them
365 line_splits = line.split()
366 vdw = float(line_splits[-3])
367 es = float(line_splits[-2])
368 both = float(line_splits[-1])
369 # Values greater than 100 are represented as 0
370 # This step is performed to filter 'infinity' values
371 energies = (vdw, es, both)
372 if both > 100:
373 #print('WARNING: We have extremly high values in energies which are beeing discarded')
374 energies = (0, 0, 0)
375 # Add current atom energy values to the atom energies list
376 atom_energies.append(energies)
377 return atom_energies
379 # Given a pdb structure, use CMIP to extract energies
380 # Output energies are returned by atom
381 def get_frame_energy (frame_structure : 'Structure') -> list[dict]:
383 # WARNING: At this point structure should be corrected
384 # WARNING: Repeated atoms will make the analysis fail
386 # Repeat the whole process for each interaction
387 data = []
388 for interaction in interactions:
390 # Check if the interaction has been marked as 'exceeds', in which case we skip it
391 if interaction.get('exceeds', False):
392 continue
394 # Get covalent bonds between both agents, if any
395 # They will be not taken in count during the calculation
396 # Otherwise we would have a huge energy peak in this atom since they are very close
397 strong_bonds = interaction.get('strong_bonds', None)
399 # Get interaction and agent names, just for the logs
400 interaction_name = interaction['name']
401 print(f' Processing {interaction_name}')
402 agent1_name = interaction['agent_1'].replace(' ', '_').replace('/', '_')
403 agent2_name = interaction['agent_2'].replace(' ', '_').replace('/', '_')
404 # Get agent atom selections
405 agent1_atom_indices = interaction[f'atom_indices_1']
406 agent1_selection = frame_structure.select_atom_indices(agent1_atom_indices)
407 if not agent1_selection:
408 raise ValueError(f'Empty agent 1 "{agent1_name}" from interaction "{interaction_name}"')
409 agent2_atom_indices = interaction[f'atom_indices_2']
410 agent2_selection = frame_structure.select_atom_indices(agent2_atom_indices)
411 if not agent2_selection:
412 raise ValueError(f'Empty agent 2 "{agent2_name}" from interaction "{interaction_name}"')
414 # Prepare the CMIP friendly input pdb structures for every calculation
415 # First prepare the host files and the prepare the guest files
416 # There is only a difference between host and guest files:
417 # Host files include both agent atoms but the guest atoms are all marked as dummy
418 # Guest files include only guest atoms while host atoms are removed
419 agent1_cmip_host = pdb2cmip(agent_name = agent1_name, host_file = True, frame_structure=frame_structure,
420 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)
421 agent2_cmip_host = pdb2cmip(agent_name = agent2_name, host_file = True, frame_structure=frame_structure,
422 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)
423 agent1_cmip_guest = pdb2cmip(agent_name = agent1_name, host_file = False, frame_structure=frame_structure,
424 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)
425 agent2_cmip_guest = pdb2cmip(agent_name = agent2_name, host_file = False, frame_structure=frame_structure,
426 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)
428 # Copy the source cmip inputs file in the local directory
429 # Inputs will be modified to adapt the cmip grid to both agents together
430 # Note than modified inputs file is not conserved along frames
431 # Structures may change along trajectory thus requiring a different grid size
432 cmip_inputs = File(f'{output_directory}/{CMIP_INPUTS_FILE}')
433 copyfile(cmip_inputs_source.path, cmip_inputs.path)
435 # Set the CMIP box dimensions and densities to fit both the host and the guest
436 # Box origin and size are modified in the cmip inputs
437 # Values returned are only used for display / debug purposes
438 box_origin, box_size = adapt_cmip_grid(agent1_cmip_guest, agent2_cmip_guest, cmip_inputs)
440 # If the debug flag is passed then, instead of calculating energies, leave it all ready and stop here
441 if debug:
442 # Copy in the energies folder a small python script used to sum output energies
443 debug_script = File(f'{output_directory}/{DEBUG_ENERGIES_SUM_SCRIPT}')
444 copyfile(debug_script_source.path, debug_script.path)
445 # Copy in the energies folder the VDM parameters input file
446 debug_vdw_parameters = File(f'{output_directory}/{VDW_PARAMETERS}')
447 copyfile(vdw_source.path, debug_vdw_parameters.path)
448 # Set auxiliar output filenames
449 debug_output_1 = 'first_output.pdb'
450 debug_output_2 = 'second_output.pdb'
451 # Write a README file explaining what to do to manually debug CMIP
452 debug_readme = File(f'{output_directory}/README')
453 with open(debug_readme.path, 'w') as file:
454 file.write(
455 '# Energies debug\n\n'
456 '# Run CMIP\n'
457 f'cmip -i {CMIP_INPUTS_FILE} -pr {agent1_cmip_guest.filename} -vdw {VDW_PARAMETERS} -hs {agent2_cmip_host.filename} -byat {debug_output_1}\n\n'
458 '# Run CMIP inverting host and guest\n'
459 f'cmip -i {CMIP_INPUTS_FILE} -pr {agent2_cmip_guest.filename} -vdw {VDW_PARAMETERS} -hs {agent1_cmip_host.filename} -byat {debug_output_2}\n\n'
460 '# Sum both energies and compare\n'
461 f'python {DEBUG_ENERGIES_SUM_SCRIPT} {debug_output_1} {debug_output_2}\n'
462 )
463 raise SystemExit(' READY TO DEBUG -> Please go to the corresponding replica "energies" directory and follow the README instructions')
465 # Run the CMIP software to get the desired energies
466 print(f' Calculating energies for {agent1_name} as guest and {agent2_name} as host')
467 agent1_atom_energies = get_cmip_energies(cmip_inputs, agent1_cmip_guest, agent2_cmip_host)
468 print(f' Calculating energies for {agent2_name} as guest and {agent1_name} as host')
469 agent2_atom_energies = get_cmip_energies(cmip_inputs, agent2_cmip_guest, agent1_cmip_host)
471 # Add None in those places where there is a strong bond
472 strong_bond_atom_indices = set(sum(strong_bonds, []))
473 for i, atom_index in enumerate(agent1_atom_indices):
474 if atom_index in strong_bond_atom_indices:
475 agent1_atom_energies.insert(i, None)
476 for i, atom_index in enumerate(agent2_atom_indices):
477 if atom_index in strong_bond_atom_indices:
478 agent2_atom_energies.insert(i, None)
480 # Make sure the output size matches the number of atoms in the interaction
481 if len(agent1_atom_energies) != len(agent1_atom_indices):
482 print(f'Number of values: {len(agent1_atom_energies)}')
483 print(f'Number of atoms: {len(agent1_atom_indices)}')
484 raise ValueError('Missmatch in the number values and atoms in agent 1')
485 if len(agent2_atom_energies) != len(agent2_atom_indices):
486 print(f'Number of values: {len(agent2_atom_energies)}')
487 print(f'Number of atoms: {len(agent2_atom_indices)}')
488 raise ValueError('Missmatch in the number values and atoms in agent 2')
490 # Print total energies at the end for every agent if the verbose flag has been passed
491 if verbose:
492 for agent_name, agent_energies in zip([agent1_name, agent2_name], [agent1_atom_energies, agent2_atom_energies]):
493 total_vdw = total_es = total_both = 0
494 for atom_energies in agent_energies:
495 total_vdw += atom_energies[0]
496 total_es += atom_energies[1]
497 total_both += atom_energies[2]
498 print(f' Total energies for {agent_name}: vmd {total_vdw}, es {total_es}, both {total_both}')
500 # DANI: Usa esto para escribir los resultados de las energías por átomo
501 # sample = {
502 # 'agent1': { 'energies': agent1_atom_energies, 'pdb': agent1_cmip },
503 # 'agent2': { 'energies': agent2_atom_energies, 'pdb': agent2_cmip },
504 # 'box': { 'origin': box_origin, 'size': box_size }
505 # }
506 # save_json(sample, 'energies_sample.json')
508 data.append({ 'agent1': agent1_atom_energies, 'agent2': agent2_atom_energies })
510 # Erase the 2 previous log lines
511 print(ERASE_3_PREVIOUS_LINES)
513 return data
515 # Extract the energies for each frame in a reduced trajectory
516 frames, step, count = get_pdb_frames(energies_structure_file.path, trajectory_file.path, snapshots, frames_limit)
517 non_exceeding_interactions = [interaction for interaction in interactions if not interaction.get('exceeds', False)]
519 # Load backup data in case there is a backup file
520 if energies_backup.exists:
521 print(' Recovering energies backup')
522 interactions_data = load_json(energies_backup.path)
523 else:
524 interactions_data = [[] for interaction in non_exceeding_interactions]
525 for frame_number, current_frame_pdb in enumerate(frames):
526 # If we already have this frame in the backup then skip it
527 if frame_number < len(interactions_data[0]):
528 continue
529 # Run the main analysis over the current frame
530 # Append the result data for each interaction
531 current_frame_structure = Structure.from_pdb_file(current_frame_pdb)
532 frame_energies_data = get_frame_energy(current_frame_structure)
533 for i, data in enumerate(frame_energies_data):
534 interactions_data[i].append(data)
535 # Save a backup just in case the process is interrupted further
536 save_json(interactions_data, energies_backup.path)
538 # Cleanup here some CMIP residual files since it is not to be run again
539 # Remove the restart file since we do not use it and it may be heavy sometimes
540 if restart_file.exists:
541 restart_file.remove()
542 # Remove fortran unit files generated when running CMIP
543 # Note that we can not define where these files are written but they appear where CMIP is run
544 fortran_unit_files = glob('fort.*')
545 for filepath in fortran_unit_files:
546 remove(filepath)
548 # Now calculated atom average values through all frames for each pair of interaction agents
549 output_summary = []
550 for i, interaction in enumerate(non_exceeding_interactions):
551 # Check if the interaction as been marked as 'exceeds', in which case we skip it
552 if interaction.get('exceeds', False): continue
553 # Get the numerated output filepath for this specific interaction
554 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)
555 # Add the root of the output analysis filename to the run data
556 analysis_name = get_analysis_name(numbered_output_analysis_filepath)
557 # Get the interaction name
558 name = interaction['name']
559 # Add this interaction to the final summary
560 output_summary.append({
561 'name': name,
562 'analysis': analysis_name
563 })
564 # If the output file already exists then skip this iteration
565 if exists(numbered_output_analysis_filepath): continue
566 # Get the main data
567 data = interactions_data[i]
568 # Format data
569 agent1_output = format_data([ frame['agent1'] for frame in data ])
570 agent2_output = format_data([ frame['agent2'] for frame in data ])
571 # Format the results data and append it to the output data
572 output = {
573 'name': name,
574 'agent1': agent1_output,
575 'agent2': agent2_output,
576 'version': '1.1.0'
577 }
578 # Export the current interaction analysis in json format
579 save_json(output, numbered_output_analysis_filepath)
581 # Finally, export the summary in json format
582 save_json(output_summary, output_analysis_filepath)
584 # Remove the backup to avoid accidentally reusing it when the output file is deleted
585 energies_backup.remove()
587 # Finally remove the reduced topology
588 energies_structure_file.remove()
590# Given a topology (e.g. pdb, prmtop), extract the atom elements in a CMIP friendly format
591# Hydrogens bonded to carbons remain as 'H'
592# Hydrogens bonded to oxygen are renamed as 'HO'
593# Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'
594# Some heavy atom elements may be also modified (e.g. 'CL' -> 'Cl')
595def set_cmip_elements (structure : 'Structure'):
596 # Iterate over each atom to fix their element according to CMIP standards
597 for a, atom in enumerate(structure.atoms):
598 element = atom.element
599 # Adapt hydrogens element to CMIP requirements
600 if element == 'H':
601 # We must find the element of the heavy atom this hydrogen is bonded to
602 atom_bonds = atom.get_bonds()
603 # There should be always only 1 bond
604 if len(atom_bonds) != 1:
605 print(f'Atom {atom.name} ({atom.index}) has {len(atom_bonds)} bonds')
606 raise ValueError('An hydrogen should always have one and only one bond')
607 bonded_atom_index = atom_bonds[0]
608 bonded_atom_element = structure.atoms[bonded_atom_index].element
609 # Hydrogens bonded to carbons remain as 'H'
610 if bonded_atom_element == 'C':
611 pass
612 # Hydrogens bonded to oxygen are renamed as 'HO'
613 elif bonded_atom_element == 'O':
614 element = 'HO'
615 # Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'
616 elif bonded_atom_element == 'N' or bonded_atom_element == 'S':
617 element = 'HN'
618 else:
619 raise SystemExit(
620 'ERROR: Hydrogen bonded to not supported heavy atom: ' + bonded_atom_element)
621 atom.element = element
623# Set residue names with no endings
624protein_mid_residue_names = { name for name in PROTEIN_RESIDUE_NAME_LETTERS if name[-1] not in {'N','C'} }
625nucleic_mid_residue_names = { name for name in NUCLEIC_RESIDUE_NAME_LETTERS if name[-1] not in {'5','3'} }
627# Change residue names in a structure to meet the CMIP requirements
628# Change terminal residue names by adding an 'N' or 'C'
629def name_terminal_residues (structure : 'Structure'):
630 # Iterate chains
631 for chain in structure.chains:
632 # Check if the first residue is tagged as a terminal residue
633 # If not, rename it
634 first_residue = chain.residues[0]
635 # In case it is a protein
636 if first_residue.name in protein_mid_residue_names:
637 first_residue.name += 'N'
638 # In case it is RNA
639 elif first_residue.name in nucleic_mid_residue_names:
640 first_residue.name += '5'
641 # Check if the last residue is tagged as 'C' terminal
642 # If not, rename it
643 last_residue = chain.residues[-1]
644 # In case it is a protein
645 if last_residue.name in protein_mid_residue_names:
646 last_residue.name += 'C'
647 # In case it is RNA
648 elif last_residue.name in nucleic_mid_residue_names:
649 last_residue.name += '3'
651def mine_cmip_output (logs):
652 center, density, units = (), (), ()
653 grid_density_exp = r"^\s*Grid density:\s+(\d+)\s+(\d+)\s+(\d+)"
654 grid_center_exp = r"^\s*Grid center:([- ]+\d+.\d+)([- ]+\d+.\d+)([- ]+\d+.\d+)"
655 grid_units_exp = r"^\s*Grid units:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)"
656 for line in logs:
657 grid_center_groups = re.match(grid_center_exp, line)
658 grid_density_groups = re.match(grid_density_exp, line)
659 grid_units_groups = re.match(grid_units_exp, line)
660 if grid_density_groups:
661 density = tuple(float(grid_density_groups.group(i))
662 for i in (1, 2, 3))
663 if grid_center_groups:
664 center = tuple(float(grid_center_groups.group(i))
665 for i in (1, 2, 3))
666 if grid_units_groups:
667 units = tuple(float(grid_units_groups.group(i))
668 for i in (1, 2, 3))
669 # If data mining fails there must be something wrong with the CMIP output
670 if center == () or density == () or units == ():
671 for line in logs:
672 print(line)
673 print('WARNING: CMIP output mining failed')
674 raise SystemExit('ERROR: Something was wrong with CMIP')
675 return center, density, units
677# This function is used to create new grid parameters
678# The new grid is expected to cover both input grids: agent 1 grid and agent 2 grid
679def compute_new_grid (
680 agent1_center,
681 agent1_density,
682 agent1_units,
683 agent2_center,
684 agent2_density,
685 agent2_units,
686 extra_density=1):
687 new_center = [] # [(i + j) / 2 for i, j in zip(agent1_center, agent2_center)]
688 new_density = []
690 for k in range(3):
691 min_prot = agent1_center[k] - agent1_density[k] * agent1_units[k]
692 min_lig = agent2_center[k] - agent2_density[k] * agent2_units[k]
693 min_new = min(min_prot, min_lig)
694 max_prot = agent1_center[k] + agent1_density[k] * agent1_units[k]
695 max_lig = agent2_center[k] + agent2_density[k] * agent2_units[k]
696 max_new = max(max_prot, max_lig)
698 dnew = int(abs(max_new - min_new) + extra_density)
699 cnew = min_new + (dnew / 2)
700 new_density.append(dnew)
701 new_center.append(cnew)
702 return new_center, new_density
704# Format data grouping atom energies by average ES/VDW
705# Calculate values for the whole set of frames analyzed
706# The canclulate it also for the 20% first frames and the 20% last frames separatedly
707def format_data (data : list) -> dict:
709 # First, reorder data by atoms and energies
710 atom_count = len(data[0])
712 atom_vdw_values = [[] for n in range(atom_count)]
713 atom_es_values = [[] for n in range(atom_count)]
714 atom_both_values = [[] for n in range(atom_count)]
715 for frame in data:
716 for a, values in enumerate(frame):
717 if values == None: continue
718 atom_vdw_values[a].append(values[0])
719 atom_es_values[a].append(values[1])
720 atom_both_values[a].append(values[2])
722 # Calculate the atom averages from each energy
723 atom_vdw_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values ]
724 atom_es_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values]
725 atom_both_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values]
727 # Calculate the atom averages from each energy at the beginig and end of the trajectory
728 # We take the initial 20% and the final 20% of frames to calculate each respectively
729 p20_frames = len(data)
730 p20 = round(p20_frames*0.2)
731 if p20 == 0: p20 = 1
733 # Initials
734 atom_vdw_values_initial = [[] for n in range(atom_count)]
735 atom_es_values_initial = [[] for n in range(atom_count)]
736 atom_both_values_initial = [[] for n in range(atom_count)]
737 for frame in data[:p20]:
738 for a, values in enumerate(frame):
739 if values == None: continue
740 atom_vdw_values_initial[a].append(values[0])
741 atom_es_values_initial[a].append(values[1])
742 atom_both_values_initial[a].append(values[2])
744 atom_vdw_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_initial]
745 atom_es_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_initial]
746 atom_both_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_initial]
748 # Finals
749 atom_vdw_values_final = [[] for n in range(atom_count)]
750 atom_es_values_final = [[] for n in range(atom_count)]
751 atom_both_values_final = [[] for n in range(atom_count)]
752 for frame in data[-p20:]:
753 for a, values in enumerate(frame):
754 if values == None: continue
755 atom_vdw_values_final[a].append(values[0])
756 atom_es_values_final[a].append(values[1])
757 atom_both_values_final[a].append(values[2])
759 atom_vdw_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_final]
760 atom_es_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_final]
761 atom_both_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_final]
763 # Format the results data and append it to the output data
764 output = {
765 'vdw': atom_vdw_avg,
766 'es': atom_es_avg,
767 'both': atom_both_avg,
768 'ivdw': atom_vdw_avg_initial,
769 'ies': atom_es_avg_initial,
770 'iboth': atom_both_avg_initial,
771 'fvdw': atom_vdw_avg_final,
772 'fes': atom_es_avg_final,
773 'fboth': atom_both_avg_final,
774 }
776 return output