Coverage for mddb_workflow / analyses / energies.py: 8%
368 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1# Energies
2#
3# The energies analysis is carried by ACPYPE and a locally developed tool: CMIP.
4# ACPYPE is a tool based in Python to use Antechamber to generate topologies for chemical compounds and to interface with others python applications.
5# CMIP stands for Classical Molecular Interaction Potential and it is usefull to predict electrostatic and Van der Waals potentials.
6#
7# ACPYPE:
8# SOUSA DA SILVA, A. W. & VRANKEN, W. F. ACPYPE - AnteChamber PYthon Parser interfacE. BMC Research Notes 5 (2012), 367 doi: 10.1186/1756-0500-5-367 http://www.biomedcentral.com/1756-0500/5/367
9# BATISTA, P. R.; WILTER, A.; DURHAM, E. H. A. B. & PASCUTTI, P. G. Molecular Dynamics Simulations Applied to the Study of Subtypes of HIV-1 Protease. Cell Biochemistry and Biophysics 44 (2006), 395-404. doi: 10.1385/CBB:44:3:395
10#
11# Antechamber:
12# WANG, J., WANG, W., KOLLMAN, P. A., and CASE, D. A. Automatic atom type and bond type perception in molecular mechanical calculations. Journal of Molecular Graphics and Modelling 25, 2 (2006), 247–260. doi: 10.1016/j.jmgm.2005.12.005
13# WANG, J., WOLF, R. M., CALDWELL, J. W., KOLLMAN, P. A., and CASE, D. A. Development and testing of a General Amber Force Field. Journal of Computational Chemistry 25, 9 (2004), 1157–1174. doi: 10.1002/jcc.20035
14#
15# CMIP:
16# Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001), Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins. Proteins, 45: 428-437. doi:10.1002/prot.1159
18# Imports libraries
19from os import mkdir, remove
20from os.path import exists
21from shutil import copyfile
22from glob import glob
23import re
24import math
25from subprocess import run, PIPE
27from mddb_workflow.tools.get_pdb_frames import get_pdb_frames
28from mddb_workflow.utils.auxiliar import load_json, save_json, warn, numerate_filename, get_analysis_name
29from mddb_workflow.utils.auxiliar import ForcedStop
30from mddb_workflow.utils.constants import OUTPUT_ENERGIES_FILENAME
31from mddb_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS
32from mddb_workflow.utils.constants import CMIP_INPUTS_CHECKONLY_SOURCE, CMIP_INPUTS_SOURCE
33from mddb_workflow.utils.constants import CMIP_VDW_SOURCE, ENERGIES_DEBUG_SCRIPT_SOURCE
34from mddb_workflow.utils.structures import Structure
35from mddb_workflow.utils.file import File
36from mddb_workflow.utils.type_hints import *
38CURSOR_UP_ONE = '\x1b[1A'
39ERASE_LINE = '\x1b[2K'
40ERASE_3_PREVIOUS_LINES = CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE
42def energies (
43 trajectory_file : File,
44 # Set a folder to be created in order to store residual output files from this analysis
45 output_directory : str,
46 structure : 'Structure',
47 interactions : list,
48 charges : list,
49 snapshots : int,
50 frames_limit : int = 100,
51 verbose : bool = False,
52 debug : bool = False):
53 """Perform the electrostatic and vdw energies analysis for each pair of interaction agents."""
55 # Make sure we have interactions
56 if not interactions or len(interactions) == 0:
57 print('No interactions were specified')
58 return
60 # Make sure we have charges
61 if not charges or type(charges) == Exception:
62 print('Atom charges are not available -> This analysis will be skipped')
63 return
65 # Set the main output filepath
66 output_analysis_filepath = f'{output_directory}/{OUTPUT_ENERGIES_FILENAME}'
68 # Set the auxiliar files further required as CMIP inputs
69 cmip_inputs_checkonly_source = File(CMIP_INPUTS_CHECKONLY_SOURCE)
70 cmip_inputs_source = File(CMIP_INPUTS_SOURCE)
71 vdw_source = File(CMIP_VDW_SOURCE)
72 debug_script_source = File(ENERGIES_DEBUG_SCRIPT_SOURCE)
74 # Set a backup file to store some results on the fly
75 # This is useful to restore these values in case the analysis is disrupt since it is a long analysis
76 energies_backup = File(output_directory + '/backup.json')
78 # Define an additional file generated by CMIP which must be handled (written in the energies folder and deleted)
79 restart_file = File(output_directory + '/restart')
81 # Check the number of atoms on each interacting agent
82 # If there is any agent with more than 80000 atoms CMIP will fail so we must skip this specific energies analysis by now
83 # DANI: Este valor límite se puede cambiar en CMIP, pero hay que recompilar y eso no es banal en un environment de conda
84 cmip_atom_limit = 80000
85 for interaction in interactions:
86 exceeds = False
87 for agent in ['1', '2']:
88 atoms = interaction[f'atom_indices_{agent}']
89 atom_count = len(atoms)
90 if atom_count >= cmip_atom_limit:
91 exceeds = True
92 break
93 if exceeds:
94 warn(f'{interaction["name"]} is exceeding the CMIP atom count limit of {cmip_atom_limit} and it will be skipped for this analysis')
95 interaction['exceeds'] = True
97 # This anlaysis produces many residual output files
98 # Create a new folder to store all ouput files so they do not overcrowd the main directory
99 if not exists(output_directory):
100 mkdir(output_directory)
102 # Adapt the structure for cmip
103 energies_structure = structure.copy()
105 # Rename residues according to if they are terminals
106 name_terminal_residues(energies_structure)
108 # Set each atom element in CMIP format
109 set_cmip_elements(energies_structure)
111 # Save the structure back to a pdb
112 energies_structure_file = File(output_directory + '/energies.pdb')
113 energies_structure.generate_pdb_file(energies_structure_file.path)
115 # Transform an agent structure to a cmip input pdb, which includes charges and cmip-friendly elements
116 # If this is to be a host file then leave only atoms involved in the interaction: both host and guest agents
117 # If this is to be a host file, all guest atoms are set dummy* as well
118 # If this is to be a guest file then remove all host atoms as well
119 # Note that the frame structure is not the standard structure, which would already include charges
120 # For this reason, charges are included in the frame structure before
121 # Also flag some atoms as 'dummy' by adding a 'X' before the element
122 # *Dummy atoms are not considered in the calculation but they stand for a region with low dielectric
123 # If removed, the void left by these atoms would be considered to be filled with solvent, which has a high dielectric
124 # These dielectric differences have a strong impact on the calculation
125 # By default we set as dummy host atoms involved in a covalent bond with guest atoms
126 def pdb2cmip (
127 agent_name : str,
128 host_file : bool,
129 frame_structure : 'Structure',
130 host_selection : 'Selection',
131 guest_selection : 'Selection',
132 strong_bonds : Optional[list]
133 ) -> File:
134 # Get atom indices for host atoms involved in a covalent bond with guest atoms
135 # If this is the host, they will be further marked as dummy for CMIP to ignore them in the calculations
136 # If this is the guest, they will be removed
137 strong_bond_atom_indices = []
138 if strong_bonds:
139 for bond in strong_bonds:
140 strong_bond_atom_indices += bond
141 # Set atoms to be flagged as dummy
142 dummy_atom_indices = set()
143 # If this is to be the host file then set guest atoms and strong bond atoms dummy
144 if host_file:
145 dummy_atom_indices |= set(strong_bond_atom_indices)
146 dummy_atom_indices |= set(guest_selection.atom_indices)
147 # Set which atoms must be kept in the output file
148 # If this is the host file then keep both host and guest atoms
149 # If this is the guest file then keep only guest atoms
150 # Always remove strong bond atoms in the guest
151 selection = host_selection + guest_selection if host_file else guest_selection
152 strong_bond_selection = frame_structure.select_atom_indices(strong_bond_atom_indices)
153 guest_strong_bonds = guest_selection & strong_bond_selection
154 selection -= guest_strong_bonds
155 # Filter the selected atoms in the structure
156 selected_structure = frame_structure.filter(selection)
157 # Set atom charges (non standard attribute), which are used further to write the adapted cmip pdb
158 # Set also the elements to macth the original structure, since the frame generator messes the elements
159 for a, atom in enumerate(selected_structure.atoms):
160 atom_index = selection.atom_indices[a]
161 charge = charges[atom_index]
162 setattr(atom, 'charge', charge)
163 cmip_element = energies_structure.atoms[atom_index].element
164 atom.element = cmip_element
165 # Write a special pdb which contains charges as CMIP expects to find them and dummy atoms flagged
166 output_filepath = f'{output_directory}/{agent_name}_{"host" if host_file else "guest"}.cmip.pdb'
167 with open(output_filepath, "w") as file:
168 # Write a line for each atom
169 for a, atom in enumerate(selected_structure.atoms):
170 index = str(a+1).rjust(5)
171 atom_name = atom.name
172 name = ' ' + atom_name.ljust(3) if len(atom_name) < 4 else atom_name
173 residue = atom.residue
174 residue_name = residue.name.ljust(3)
175 chain = atom.chain
176 chain_name = chain.name.rjust(1)
177 residue_number = str(residue.number).rjust(4)
178 icode = residue.icode.rjust(1)
179 coords = atom.coords
180 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in coords ]
181 charge = "{:.4f}".format(atom.charge) # Charge was manually added before, it is not a standard attribute
182 # In case this atom is making an strong bond between both interacting agents we add an 'X' before the element
183 # This way CMIP will ignore the atom. Otherwise it would return high non-sense Van der Waals values
184 real_index = selection.atom_indices[a]
185 # Set if atom is to be ignored
186 is_dummy = real_index in dummy_atom_indices
187 cmip_dummy_flag = 'X' if is_dummy else ''
188 element = atom.element
189 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name + ' '
190 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord
191 + ' ' + str(charge).rjust(7) + ' ' + cmip_dummy_flag + element + '\n')
192 file.write(atom_line)
194 return File(output_filepath)
196 # Run CMIP in 'checkonly' mode (i.e. start and stop) for both agents
197 # Mine the grid generated by CMIP for both agents and calculate a new grid which would include both
198 # Finally, modify the provided 'cmip_inputs' with the new grid parameters
199 def adapt_cmip_grid (agent1_cmip_pdb : File, agent2_cmip_pdb : File, cmip_inputs : File) -> tuple:
201 # Set a name for the checkonly CMIP outputs
202 # This name is not important, since the data we want is in the CMIP logs
203 cmip_checkonly_output = File(output_directory + '/checkonly.energy.pdb')
205 # Run CMIP in 'checkonly' mode and save the grid dimensions output
206 # First do it for the agent 1
207 cmip_logs_agent1 = run([
208 "cmip",
209 "-i",
210 cmip_inputs_checkonly_source.path,
211 "-pr",
212 agent1_cmip_pdb.path,
213 "-vdw",
214 vdw_source.path,
215 "-hs",
216 agent2_cmip_pdb.path,
217 "-byat",
218 cmip_checkonly_output.path,
219 "-rst",
220 restart_file.path
221 ], stdout=PIPE, stderr=PIPE).stdout.decode()
223 # Mine the grid dimensions from CMIP logs
224 agent1_center, agent1_density, agent1_units = mine_cmip_output(
225 cmip_logs_agent1.split("\n"))
227 # Run CMIP in 'checkonly' mode and save the grid dimensions output
228 # Now do it for the agent 2
229 cmip_logs_agent2 = run([
230 "cmip",
231 "-i",
232 cmip_inputs_checkonly_source.path,
233 "-pr",
234 agent2_cmip_pdb.path,
235 "-vdw",
236 vdw_source.path,
237 "-hs",
238 agent1_cmip_pdb.path,
239 "-byat",
240 cmip_checkonly_output.path,
241 "-rst",
242 restart_file.path
243 ], stdout=PIPE, stderr=PIPE).stdout.decode()
245 # Mine the grid dimensions from CMIP logs
246 agent2_center, agent2_density, agent2_units = mine_cmip_output(
247 cmip_logs_agent2.split("\n"))
249 # Calculate grid dimensions for a new grid which contains both previous grids
250 new_center, new_density = compute_new_grid(
251 agent1_center,
252 agent1_density,
253 agent1_units,
254 agent2_center,
255 agent2_density,
256 agent2_units)
258 # Here we must calculate how many grid points the new box would have
259 # In case the number of points exceeds a safe limit we must reduce it
260 # Otherwise CMIP could return the following error:
261 # ERROR when trying to allocate -2092351636 bytes of memory
263 # Set the limit number of grid points
264 # It has been found experimentally: 65945880 Grid points -> OK, 68903412 Grid points -> ERROR
265 grid_points_limit = 65000000
267 # Set the default grid 'cells' size for all three dimensions
268 grid_unit_size = 0.5
270 # Calculate the amount of total points with current parameters
271 current_grid_points = (new_density[0] + 1) * (new_density[1] + 1) * (new_density[2] + 1)
273 # In case the current number of grid points exceeds the limit...
274 # Reduce all dimension densities in proportion and expand the grid units size to compensate
275 if current_grid_points > grid_points_limit:
276 print(f'WARNING: Grid points limit is exceeded ({current_grid_points})')
277 proportion = grid_points_limit / current_grid_points
278 new_density[0] = math.ceil(new_density[0] * proportion)
279 new_density[1] = math.ceil(new_density[1] * proportion)
280 new_density[2] = math.ceil(new_density[2] * proportion)
281 grid_unit_size = math.ceil(grid_unit_size / proportion * 1000) / 1000
282 print(f'WARNING: Grid resolution has been reduced -> unit size = {grid_unit_size}')
284 # Set the new lines to be written in the local CMIP inputs file
285 grid_inputs = [
286 f" cenx={new_center[0]:.1f} \n",
287 f" ceny={new_center[1]:.1f} \n",
288 f" cenz={new_center[2]:.1f} \n",
289 f" dimx={int(new_density[0])} \n",
290 f" dimy={int(new_density[1])} \n",
291 f" dimz={int(new_density[2])} \n",
292 f" intx={grid_unit_size} \n",
293 f" inty={grid_unit_size} \n",
294 f" intz={grid_unit_size} \n",
295 ]
296 # Add previous lines to the local inputs file
297 with open(cmip_inputs.path, "r+") as file:
298 lines = file.readlines()
299 file.seek(0)
300 for line in lines:
301 if line == '&end \n':
302 for grid_input in grid_inputs:
303 file.write(grid_input)
304 file.write(line)
306 # Delete the 'ckeckonly' file
307 cmip_checkonly_output.remove()
309 # Calculate the resulting box origin and size and return both values
310 # These values are used for display / debug purposes only
311 new_size = (new_density[0] * grid_unit_size, new_density[1] * grid_unit_size, new_density[2] * grid_unit_size)
312 new_origin = (new_center[0] - new_size[0] / 2, new_center[1] - new_size[1] / 2, new_center[2] - new_size[2] / 2)
313 return new_origin, new_size
315 # Run the CMIP software to get the desired energies
316 def get_cmip_energies (cmip_inputs : File, guest : File, host : File) -> tuple:
317 # Set the cmip output filename, which is to be read rigth after it is generated
318 cmip_output_file = File(output_directory + '/cmip_output.pdb')
319 # Run cmip
320 cmip_process = run([
321 "cmip",
322 "-i",
323 cmip_inputs.path,
324 "-pr",
325 guest.path,
326 "-vdw",
327 vdw_source.path,
328 "-hs",
329 host.path,
330 "-byat",
331 cmip_output_file.path,
332 "-rst",
333 restart_file.path,
334 ], stdout=PIPE, stderr=PIPE)
335 cmip_logs = cmip_process.stdout.decode()
336 # If the output does note exist at this poin then it means something went wrong with CMIP
337 if not exists(cmip_output_file.path):
338 print(cmip_logs)
339 cmip_error_logs = cmip_process.stderr.decode()
340 print(cmip_error_logs)
341 raise RuntimeError('Something went wrong with CMIP!')
342 # Mine the electrostatic (es) and Van der Walls (vdw) energies for each atom
343 # Group the results by atom adding their values
344 atom_energies = []
345 with open(cmip_output_file.path, 'r') as file:
346 lines = list(file)
347 # If this file is empty it means something went wrong with CMIP
348 # We print its logs and exit
349 if len(lines) == 0:
350 if type(cmip_logs) == str:
351 print(cmip_logs)
352 else:
353 for line in cmip_logs:
354 print(line)
355 raise RuntimeError('Something went wrong with CMIP!')
356 # Mine energies line by line (i.e. atom by atom)
357 for line in lines:
358 # WARNING: This works most times but there is an exception
359 # vdw = float(line[42:53])
360 # es = float(line[57:68])
361 # both = float(line[72:83])
362 # Numbers may become larger than expected when they are close to 0 since they are expressed in E notation
363 # e.g. 0.1075E-04
364 # For this reason we must mine these last values relying on whitespaces between them
365 line_splits = line.split()
366 vdw = float(line_splits[-3])
367 es = float(line_splits[-2])
368 both = float(line_splits[-1])
369 # Values greater than 100 are represented as 0
370 # This step is performed to filter 'infinity' values
371 energies = (vdw, es, both)
372 if both > 100:
373 #print('WARNING: We have extremly high values in energies which are beeing discarded')
374 energies = (0, 0, 0)
375 # Add current atom energy values to the atom energies list
376 atom_energies.append(energies)
377 return atom_energies
379 # Given a pdb structure, use CMIP to extract energies
380 # Output energies are returned by atom
381 def get_frame_energy (frame_structure : 'Structure') -> list[dict]:
383 # WARNING: At this point structure should be corrected
384 # WARNING: Repeated atoms will make the analysis fail
386 # Repeat the whole process for each interaction
387 data = []
388 for interaction in interactions:
390 # Check if the interaction has been marked as 'exceeds', in which case we skip it
391 if interaction.get('exceeds', False):
392 continue
394 # Get covalent bonds between both agents, if any
395 # They will be not taken in count during the calculation
396 # Otherwise we would have a huge energy peak in this atom since they are very close
397 strong_bonds = interaction.get('strong_bonds', None)
399 # Get interaction and agent names, just for the logs
400 interaction_name = interaction['name']
401 print(f' Processing {interaction_name}')
402 agent1_name = interaction['agent_1'].replace(' ', '_').replace('/', '_')
403 agent2_name = interaction['agent_2'].replace(' ', '_').replace('/', '_')
404 # Get agent atom selections
405 agent1_atom_indices = interaction[f'atom_indices_1']
406 agent1_selection = frame_structure.select_atom_indices(agent1_atom_indices)
407 if not agent1_selection:
408 raise ValueError(f'Empty agent 1 "{agent1_name}" from interaction "{interaction_name}"')
409 agent2_atom_indices = interaction[f'atom_indices_2']
410 agent2_selection = frame_structure.select_atom_indices(agent2_atom_indices)
411 if not agent2_selection:
412 raise ValueError(f'Empty agent 2 "{agent2_name}" from interaction "{interaction_name}"')
414 # Prepare the CMIP friendly input pdb structures for every calculation
415 # First prepare the host files and the prepare the guest files
416 # There is only a difference between host and guest files:
417 # Host files include both agent atoms but the guest atoms are all marked as dummy
418 # Guest files include only guest atoms while host atoms are removed
419 agent1_cmip_host = pdb2cmip(agent_name = agent1_name, host_file = True, frame_structure=frame_structure,
420 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)
421 agent2_cmip_host = pdb2cmip(agent_name = agent2_name, host_file = True, frame_structure=frame_structure,
422 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)
423 agent1_cmip_guest = pdb2cmip(agent_name = agent1_name, host_file = False, frame_structure=frame_structure,
424 host_selection=agent2_selection, guest_selection=agent1_selection, strong_bonds=strong_bonds)
425 agent2_cmip_guest = pdb2cmip(agent_name = agent2_name, host_file = False, frame_structure=frame_structure,
426 host_selection=agent1_selection, guest_selection=agent2_selection, strong_bonds=strong_bonds)
428 # Copy the source cmip inputs file in the local directory
429 # Inputs will be modified to adapt the cmip grid to both agents together
430 # Note than modified inputs file is not conserved along frames
431 # Structures may change along trajectory thus requiring a different grid size
432 cmip_inputs_filename = cmip_inputs_source.filename
433 cmip_inputs = File(f'{output_directory}/{cmip_inputs_filename}')
434 copyfile(cmip_inputs_source.path, cmip_inputs.path)
436 # Set the CMIP box dimensions and densities to fit both the host and the guest
437 # Box origin and size are modified in the cmip inputs
438 # Values returned are only used for display / debug purposes
439 box_origin, box_size = adapt_cmip_grid(agent1_cmip_guest, agent2_cmip_guest, cmip_inputs)
441 # If the debug flag is passed then, instead of calculating energies, leave it all ready and stop here
442 if debug:
443 # Copy in the energies folder a small python script used to sum output energies
444 debug_script_filename = debug_script_source.filename
445 debug_script = File(f'{output_directory}/{debug_script_filename}')
446 copyfile(debug_script_source.path, debug_script.path)
447 # Copy in the energies folder the VDM parameters input file
448 debug_vdw_filename = vdw_source.fiename
449 debug_vdw_parameters = File(f'{output_directory}/{debug_vdw_filename}')
450 copyfile(vdw_source.path, debug_vdw_parameters.path)
451 # Set auxiliar output filenames
452 debug_output_1 = 'first_output.pdb'
453 debug_output_2 = 'second_output.pdb'
454 # Write a README file explaining what to do to manually debug CMIP
455 debug_readme = File(f'{output_directory}/README')
456 with open(debug_readme.path, 'w') as file:
457 file.write(
458 '# Energies debug\n\n'
459 '# Run CMIP\n'
460 f'cmip -i {cmip_inputs_filename} -pr {agent1_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent2_cmip_host.filename} -byat {debug_output_1}\n\n'
461 '# Run CMIP inverting host and guest\n'
462 f'cmip -i {cmip_inputs_filename} -pr {agent2_cmip_guest.filename} -vdw {debug_vdw_filename} -hs {agent1_cmip_host.filename} -byat {debug_output_2}\n\n'
463 '# Sum both energies and compare\n'
464 f'python {debug_script_filename} {debug_output_1} {debug_output_2}\n'
465 )
466 raise ForcedStop(' READY TO DEBUG -> Please go to the corresponding replica "energies" directory and follow the README instructions')
468 # Run the CMIP software to get the desired energies
469 print(f' Calculating energies for {agent1_name} as guest and {agent2_name} as host')
470 agent1_atom_energies = get_cmip_energies(cmip_inputs, agent1_cmip_guest, agent2_cmip_host)
471 print(f' Calculating energies for {agent2_name} as guest and {agent1_name} as host')
472 agent2_atom_energies = get_cmip_energies(cmip_inputs, agent2_cmip_guest, agent1_cmip_host)
474 # Add None in those places where there is a strong bond
475 strong_bond_atom_indices = set(sum(strong_bonds, []))
476 for i, atom_index in enumerate(agent1_atom_indices):
477 if atom_index in strong_bond_atom_indices:
478 agent1_atom_energies.insert(i, None)
479 for i, atom_index in enumerate(agent2_atom_indices):
480 if atom_index in strong_bond_atom_indices:
481 agent2_atom_energies.insert(i, None)
483 # Make sure the output size matches the number of atoms in the interaction
484 if len(agent1_atom_energies) != len(agent1_atom_indices):
485 print(f'Number of values: {len(agent1_atom_energies)}')
486 print(f'Number of atoms: {len(agent1_atom_indices)}')
487 raise ValueError('Missmatch in the number values and atoms in agent 1')
488 if len(agent2_atom_energies) != len(agent2_atom_indices):
489 print(f'Number of values: {len(agent2_atom_energies)}')
490 print(f'Number of atoms: {len(agent2_atom_indices)}')
491 raise ValueError('Missmatch in the number values and atoms in agent 2')
493 # Print total energies at the end for every agent if the verbose flag has been passed
494 if verbose:
495 for agent_name, agent_energies in zip([agent1_name, agent2_name], [agent1_atom_energies, agent2_atom_energies]):
496 total_vdw = total_es = total_both = 0
497 for atom_energies in agent_energies:
498 total_vdw += atom_energies[0]
499 total_es += atom_energies[1]
500 total_both += atom_energies[2]
501 print(f' Total energies for {agent_name}: vmd {total_vdw}, es {total_es}, both {total_both}')
503 # DANI: Usa esto para escribir los resultados de las energías por átomo
504 # sample = {
505 # 'agent1': { 'energies': agent1_atom_energies, 'pdb': agent1_cmip },
506 # 'agent2': { 'energies': agent2_atom_energies, 'pdb': agent2_cmip },
507 # 'box': { 'origin': box_origin, 'size': box_size }
508 # }
509 # save_json(sample, 'energies_sample.json')
511 data.append({ 'agent1': agent1_atom_energies, 'agent2': agent2_atom_energies })
513 # Erase the 2 previous log lines
514 print(ERASE_3_PREVIOUS_LINES)
516 return data
518 # Extract the energies for each frame in a reduced trajectory
519 frames, step, count = get_pdb_frames(energies_structure_file.path, trajectory_file.path, snapshots, frames_limit)
520 non_exceeding_interactions = [interaction for interaction in interactions if not interaction.get('exceeds', False)]
522 # Load backup data in case there is a backup file
523 if energies_backup.exists:
524 print(' Recovering energies backup')
525 interactions_data = load_json(energies_backup.path)
526 else:
527 interactions_data = [[] for interaction in non_exceeding_interactions]
528 for frame_number, current_frame_pdb in enumerate(frames):
529 # If we already have this frame in the backup then skip it
530 if frame_number < len(interactions_data[0]):
531 continue
532 # Run the main analysis over the current frame
533 # Append the result data for each interaction
534 current_frame_structure = Structure.from_pdb_file(current_frame_pdb)
535 frame_energies_data = get_frame_energy(current_frame_structure)
536 for i, data in enumerate(frame_energies_data):
537 interactions_data[i].append(data)
538 # Save a backup just in case the process is interrupted further
539 save_json(interactions_data, energies_backup.path)
541 # Cleanup here some CMIP residual files since it is not to be run again
542 # Remove the restart file since we do not use it and it may be heavy sometimes
543 if restart_file.exists:
544 restart_file.remove()
545 # Remove fortran unit files generated when running CMIP
546 # Note that we can not define where these files are written but they appear where CMIP is run
547 fortran_unit_files = glob('fort.*')
548 for filepath in fortran_unit_files:
549 remove(filepath)
551 # Now calculated atom average values through all frames for each pair of interaction agents
552 output_summary = []
553 for i, interaction in enumerate(non_exceeding_interactions):
554 # Check if the interaction as been marked as 'exceeds', in which case we skip it
555 if interaction.get('exceeds', False): continue
556 # Get the numerated output filepath for this specific interaction
557 numbered_output_analysis_filepath = numerate_filename(output_analysis_filepath, i)
558 # Add the root of the output analysis filename to the run data
559 analysis_name = get_analysis_name(numbered_output_analysis_filepath)
560 # Get the interaction name
561 name = interaction['name']
562 # Add this interaction to the final summary
563 output_summary.append({
564 'name': name,
565 'analysis': analysis_name
566 })
567 # If the output file already exists then skip this iteration
568 if exists(numbered_output_analysis_filepath): continue
569 # Get the main data
570 data = interactions_data[i]
571 # Format data
572 agent1_output = format_data([ frame['agent1'] for frame in data ])
573 agent2_output = format_data([ frame['agent2'] for frame in data ])
574 # Format the results data and append it to the output data
575 output = {
576 'name': name,
577 'agent1': agent1_output,
578 'agent2': agent2_output,
579 'version': '1.1.0'
580 }
581 # Export the current interaction analysis in json format
582 save_json(output, numbered_output_analysis_filepath)
584 # Finally, export the summary in json format
585 save_json(output_summary, output_analysis_filepath)
587 # Remove the backup to avoid accidentally reusing it when the output file is deleted
588 energies_backup.remove()
590 # Finally remove the reduced topology
591 energies_structure_file.remove()
593# Given a topology (e.g. pdb, prmtop), extract the atom elements in a CMIP friendly format
594# Hydrogens bonded to carbons remain as 'H'
595# Hydrogens bonded to oxygen are renamed as 'HO'
596# Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'
597# Some heavy atom elements may be also modified (e.g. 'CL' -> 'Cl')
598def set_cmip_elements (structure : 'Structure'):
599 # Iterate over each atom to fix their element according to CMIP standards
600 for a, atom in enumerate(structure.atoms):
601 element = atom.element
602 # Adapt hydrogens element to CMIP requirements
603 if element == 'H':
604 # We must find the element of the heavy atom this hydrogen is bonded to
605 atom_bonds = atom.get_bonds()
606 # There should be always only 1 bond
607 if len(atom_bonds) != 1:
608 print(f'Atom {atom.name} ({atom.index}) has {len(atom_bonds)} bonds')
609 raise ValueError('An hydrogen should always have one and only one bond')
610 bonded_atom_index = atom_bonds[0]
611 bonded_atom_element = structure.atoms[bonded_atom_index].element
612 # Hydrogens bonded to carbons remain as 'H'
613 if bonded_atom_element == 'C':
614 pass
615 # Hydrogens bonded to oxygen are renamed as 'HO'
616 elif bonded_atom_element == 'O':
617 element = 'HO'
618 # Hydrogens bonded to nitrogen or sulfur are renamed as 'HN'
619 elif bonded_atom_element == 'N' or bonded_atom_element == 'S':
620 element = 'HN'
621 else:
622 raise RuntimeError(f'Hydrogen bonded to not supported heavy atom: {bonded_atom_element}')
623 atom.element = element
625# Set residue names with no endings
626protein_mid_residue_names = { name for name in PROTEIN_RESIDUE_NAME_LETTERS if name[-1] not in {'N','C'} }
627nucleic_mid_residue_names = { name for name in NUCLEIC_RESIDUE_NAME_LETTERS if name[-1] not in {'5','3'} }
629# Change residue names in a structure to meet the CMIP requirements
630# Change terminal residue names by adding an 'N' or 'C'
631def name_terminal_residues (structure : 'Structure'):
632 # Iterate chains
633 for chain in structure.chains:
634 # Check if the first residue is tagged as a terminal residue
635 # If not, rename it
636 first_residue = chain.residues[0]
637 # In case it is a protein
638 if first_residue.name in protein_mid_residue_names:
639 first_residue.name += 'N'
640 # In case it is RNA
641 elif first_residue.name in nucleic_mid_residue_names:
642 first_residue.name += '5'
643 # Check if the last residue is tagged as 'C' terminal
644 # If not, rename it
645 last_residue = chain.residues[-1]
646 # In case it is a protein
647 if last_residue.name in protein_mid_residue_names:
648 last_residue.name += 'C'
649 # In case it is RNA
650 elif last_residue.name in nucleic_mid_residue_names:
651 last_residue.name += '3'
653def mine_cmip_output (logs):
654 center, density, units = (), (), ()
655 grid_density_exp = r"^\s*Grid density:\s+(\d+)\s+(\d+)\s+(\d+)"
656 grid_center_exp = r"^\s*Grid center:([- ]+\d+.\d+)([- ]+\d+.\d+)([- ]+\d+.\d+)"
657 grid_units_exp = r"^\s*Grid units:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)"
658 for line in logs:
659 grid_center_groups = re.match(grid_center_exp, line)
660 grid_density_groups = re.match(grid_density_exp, line)
661 grid_units_groups = re.match(grid_units_exp, line)
662 if grid_density_groups:
663 density = tuple(float(grid_density_groups.group(i))
664 for i in (1, 2, 3))
665 if grid_center_groups:
666 center = tuple(float(grid_center_groups.group(i))
667 for i in (1, 2, 3))
668 if grid_units_groups:
669 units = tuple(float(grid_units_groups.group(i))
670 for i in (1, 2, 3))
671 # If data mining fails there must be something wrong with the CMIP output
672 if center == () or density == () or units == ():
673 for line in logs:
674 print(line)
675 raise RuntimeError('CMIP output mining failed')
676 return center, density, units
678# This function is used to create new grid parameters
679# The new grid is expected to cover both input grids: agent 1 grid and agent 2 grid
680def compute_new_grid (
681 agent1_center,
682 agent1_density,
683 agent1_units,
684 agent2_center,
685 agent2_density,
686 agent2_units,
687 extra_density=1):
688 new_center = [] # [(i + j) / 2 for i, j in zip(agent1_center, agent2_center)]
689 new_density = []
691 for k in range(3):
692 min_prot = agent1_center[k] - agent1_density[k] * agent1_units[k]
693 min_lig = agent2_center[k] - agent2_density[k] * agent2_units[k]
694 min_new = min(min_prot, min_lig)
695 max_prot = agent1_center[k] + agent1_density[k] * agent1_units[k]
696 max_lig = agent2_center[k] + agent2_density[k] * agent2_units[k]
697 max_new = max(max_prot, max_lig)
699 dnew = int(abs(max_new - min_new) + extra_density)
700 cnew = min_new + (dnew / 2)
701 new_density.append(dnew)
702 new_center.append(cnew)
703 return new_center, new_density
705# Format data grouping atom energies by average ES/VDW
706# Calculate values for the whole set of frames analyzed
707# The canclulate it also for the 20% first frames and the 20% last frames separatedly
708def format_data (data : list) -> dict:
710 # First, reorder data by atoms and energies
711 atom_count = len(data[0])
713 atom_vdw_values = [[] for n in range(atom_count)]
714 atom_es_values = [[] for n in range(atom_count)]
715 atom_both_values = [[] for n in range(atom_count)]
716 for frame in data:
717 for a, values in enumerate(frame):
718 if values == None: continue
719 atom_vdw_values[a].append(values[0])
720 atom_es_values[a].append(values[1])
721 atom_both_values[a].append(values[2])
723 # Calculate the atom averages from each energy
724 atom_vdw_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values ]
725 atom_es_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values]
726 atom_both_avg = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values]
728 # Calculate the atom averages from each energy at the beginig and end of the trajectory
729 # We take the initial 20% and the final 20% of frames to calculate each respectively
730 p20_frames = len(data)
731 p20 = round(p20_frames*0.2)
732 if p20 == 0: p20 = 1
734 # Initials
735 atom_vdw_values_initial = [[] for n in range(atom_count)]
736 atom_es_values_initial = [[] for n in range(atom_count)]
737 atom_both_values_initial = [[] for n in range(atom_count)]
738 for frame in data[:p20]:
739 for a, values in enumerate(frame):
740 if values == None: continue
741 atom_vdw_values_initial[a].append(values[0])
742 atom_es_values_initial[a].append(values[1])
743 atom_both_values_initial[a].append(values[2])
745 atom_vdw_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_initial]
746 atom_es_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_initial]
747 atom_both_avg_initial = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_initial]
749 # Finals
750 atom_vdw_values_final = [[] for n in range(atom_count)]
751 atom_es_values_final = [[] for n in range(atom_count)]
752 atom_both_values_final = [[] for n in range(atom_count)]
753 for frame in data[-p20:]:
754 for a, values in enumerate(frame):
755 if values == None: continue
756 atom_vdw_values_final[a].append(values[0])
757 atom_es_values_final[a].append(values[1])
758 atom_both_values_final[a].append(values[2])
760 atom_vdw_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_vdw_values_final]
761 atom_es_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_es_values_final]
762 atom_both_avg_final = [sum(v) / len(v) if len(v) > 0 else None for v in atom_both_values_final]
764 # Format the results data and append it to the output data
765 output = {
766 'vdw': atom_vdw_avg,
767 'es': atom_es_avg,
768 'both': atom_both_avg,
769 'ivdw': atom_vdw_avg_initial,
770 'ies': atom_es_avg_initial,
771 'iboth': atom_both_avg_initial,
772 'fvdw': atom_vdw_avg_final,
773 'fes': atom_es_avg_final,
774 'fboth': atom_both_avg_final,
775 }
777 return output