Coverage for model_workflow/utils/topologies.py: 96%
112 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import re
3from model_workflow.utils.gmx_spells import get_tpr_content
4from model_workflow.utils.type_hints import *
6# Set some constants
7AMBER_FLAG_PATTERN = r'%FLAG ([A-Z_]*)\s*'
8AMBER_FORMAT_PATTERN = r'%FORMAT\(([0-9A-Za-z.]*)\)\s*'
9AMBER_FLAG_FORMAT_TYPES = {
10 '20a4': str,
11 '1a80': str,
12 '10I8': int,
13 '20I4': int,
14 '1I8': int,
15 '3I8': int,
16 '5E16.8': float,
17}
19# Unified class to read topologies of multiple formats
20# This class relies in other readers and reads what others do not reach
21class Topology:
22 def __init__ (self, file : 'File'):
23 self.file = file
24 self.format = file.format
25 # Set internal variables
26 self._raw_content = None
27 self._flag_summary = None
29 # Read topology file content
30 def read_raw_content (self):
31 # If we already read the content then return it
32 if self._raw_content: return self._raw_content
33 # TPR topologies are not readable as they are, since they are in binary format
34 if self.format == 'tpr':
35 output, error = get_tpr_content(self.path)
36 self._raw_content = output
37 return self._raw_content
38 # The rest of formats are usually readable ASCII
39 with open(self.file.path, 'r') as file:
40 self._raw_content = file.readlines()
41 return self._raw_content
42 raw_content = property(read_raw_content, None, None, "Topology file content (read only)")
44 # Amber topologies only
45 # Set a summary with all the available falgs and their lines
46 def get_flag_summary (self) -> dict:
47 # If we already made the summary then return it
48 if self._flag_summary: return self._flag_summary
49 # Otherwise build the summary
50 summary = {}
51 last_flag_lines = {}
52 # Iterate lines in the topology raw content until we find a target flag
53 line_number = 0
54 for line in self.raw_content:
55 match = re.search(AMBER_FLAG_PATTERN, line)
56 if match:
57 # Close the previous flag lines dict (first time is redundant)
58 last_flag_lines['to'] = line_number-1
59 # Get the new flag
60 flag = match[1]
61 # Get flag format
62 flag_format_line = self.raw_content[line_number+1]
63 format_match = re.search(AMBER_FORMAT_PATTERN, flag_format_line)
64 if not format_match: raise ValueError(f'Flag {flag} has no format line')
65 format = format_match[1]
66 format_type = AMBER_FLAG_FORMAT_TYPES[format]
67 # Set the new flag lines dict
68 last_flag_lines = { 'type': format_type, 'from': line_number }
69 summary[flag] = last_flag_lines
70 line_number += 1
71 # Finish the last summary lines
72 last_flag_lines['to'] = line_number
73 self._flag_summary = summary
74 return self._flag_summary
75 flag_summary = property(get_flag_summary, None, None, "Amber topologies only. Flags summary and lines (read only)")
77 # Amber topologies only
78 # Mine values under a specific flag
79 # Available flags: TITLE, POINTERS, ATOM_NAME, CHARGE, ATOMIC_NUMBER, MASS, ATOM_TYPE_INDEX,
80 # NUMBER_EXCLUDED_ATOMS, NONBONDED_PARM_INDEX, RESIDUE_LABEL, RESIDUE_POINTER, BOND_FORCE_CONSTANT,
81 # BOND_EQUIL_VALUE, ANGLE_FORCE_CONSTANT, ANGLE_EQUIL_VALUE, DIHEDRAL_FORCE_CONSTANT,
82 # DIHEDRAL_PERIODICITY, DIHEDRAL_PHASE, SCEE_SCALE_FACTOR, SCNB_SCALE_FACTOR, SOLTY,
83 # LENNARD_JONES_ACOEF, LENNARD_JONES_BCOEF, BONDS_INC_HYDROGEN, BONDS_WITHOUT_HYDROGEN,
84 # ANGLES_INC_HYDROGEN, ANGLES_WITHOUT_HYDROGEN, DIHEDRALS_INC_HYDROGEN, DIHEDRALS_WITHOUT_HYDROGEN,
85 # EXCLUDED_ATOMS_LIST, HBOND_ACOEF, HBOND_BCOEF, HBCUT, AMBER_ATOM_TYPE, TREE_CHAIN_CLASSIFICATION,
86 # JOIN_ARRAY, IROTAT, RADIUS_SET, RADII, SCREEN, IPOL
87 def mine_amber_flag (self, flag : str) -> list:
88 # Find the lines to be read
89 summary = self.flag_summary[flag]
90 # Get the type of data in the flag content
91 flag_type = summary['type']
92 # Read the lines
93 # Skip the first 2 lines since they are the flag header and the format header
94 start_line = summary['from'] + 2
95 end_line = summary['to'] + 1
96 lines = self.raw_content[start_line:end_line]
97 # Parse and add all values within the flag in a list
98 values = []
99 for line in lines:
100 # Remove spaces and breaklines, then split the line by spaces
101 string_values = line[0:-1].strip().split()
102 # If flag contents are strings then return string values as they are
103 if flag_type == str: values += string_values
104 # Otherwise parse strings to numeric values if required
105 # Note that flag_type may be int or float
106 else: values += [ flag_type(value) for value in string_values ]
107 return values
109 # Get dihedrals data in a standarized format
110 def get_dihedrals_data (self) -> List[dict]:
111 if self.format == 'prmtop': return self._get_dihedrals_data_amber()
112 raise ValueError(f'Reading dihedrals data is not supported in {self.format} topologies')
114 # Amber topologies only
115 # Get dihedrals data in a standarized format
116 # https://ambermd.org/FileFormats.php
117 def _get_dihedrals_data_amber (self) -> List[dict]:
118 # Get dihedral-related values from the topology
119 including_hydrogens = self.mine_amber_flag('DIHEDRALS_INC_HYDROGEN')
120 excluding_hydrogens = self.mine_amber_flag('DIHEDRALS_WITHOUT_HYDROGEN')
121 force_constants = self.mine_amber_flag('DIHEDRAL_FORCE_CONSTANT')
122 preiodicities = self.mine_amber_flag('DIHEDRAL_PERIODICITY')
123 phases = self.mine_amber_flag('DIHEDRAL_PHASE')
124 ee_scale_factors = self.mine_amber_flag('SCEE_SCALE_FACTOR')
125 vdw_scale_factors = self.mine_amber_flag('SCNB_SCALE_FACTOR')
126 charges = self.mine_amber_flag('CHARGE')
127 atom_types = self.mine_amber_flag('ATOM_TYPE_INDEX')
128 ntypes = len(set(atom_types))
129 nonbonded_parameter_indices = self.mine_amber_flag('NONBONDED_PARM_INDEX')
130 lj_acoefs = self.mine_amber_flag('LENNARD_JONES_ACOEF')
131 lj_bcoefs = self.mine_amber_flag('LENNARD_JONES_BCOEF')
132 # Start processing dihedrals by finding their implicated atoms and "type"
133 # These values are listed in groups of 5 integer numbers (4 atoms + 1 type)
134 dihedrals_data = {}
135 all_dihedrals = including_hydrogens + excluding_hydrogens
136 dihedral_count = int(len(all_dihedrals) / 5)
137 for dih in range(dihedral_count):
138 start = dih * 5
139 dihedral_numbers = all_dihedrals[start:start+5]
140 # However indices are a bit "encripted" for performance reasons
141 # This is documented somewhere, but you must divide by 3 to get the actual index
142 # Some docs say yo also havo to add 1, but this is only to have the 1-based numeration
143 # If you want the 0-based index then is just dividing by 3
144 # Also the third index may be negative for some dihedrals but this is not literal
145 # The negative number means the dihedral was ignored for end interaction calculations
146 # This means its 1-4 atom electrostatic and van der waals energies are ignored
147 # We make a tuple out of atom indices so we can use them as keys to group dihedrals
148 # Note that some dihedrals have multiple terms
149 encrypted_atom_indices = dihedral_numbers[0:4]
150 atom_indices = tuple([ int(abs(index)/3) for index in encrypted_atom_indices ])
151 ignored_1_4_energies = dihedral_numbers[2] < 0
152 dihedral_type = dihedral_numbers[4]
153 # Get the correspondong dihedral constants
154 # Dihedral type numbering starts at 1, not at 0
155 dihedral_index = dihedral_type - 1
156 force_constant = force_constants[dihedral_index]
157 periodicity = preiodicities[dihedral_index]
158 if periodicity < 0:
159 raise ValueError('Negative preiodicity is documented but not yet supported')
160 phase = phases[dihedral_index]
161 # Get the current dihedral data
162 dihedral_data = dihedrals_data.get(atom_indices, None)
163 if dihedral_data == None:
164 # Create a new object if this is the first time to access this specific dihedral
165 dihedral_data = { 'atom_indices': atom_indices, 'terms': [] }
166 # Get end atom indices
167 atom1_index = atom_indices[0]
168 atom4_index = atom_indices[3]
169 # Mine atom charges for the end atoms
170 dihedral_data['atom1_charge'] = charges[atom1_index]
171 dihedral_data['atom4_charge'] = charges[atom4_index]
172 # To get the next two values we need to find a complex index
173 # We substract 1 to atom type inidces to get 0-based indices
174 atom1_type_index = atom_types[atom1_index] - 1
175 atom4_type_index = atom_types[atom4_index] - 1
176 # Thus the parameter index becomes also 0-based
177 nonbonded_parameter_index_index = (ntypes * atom1_type_index) + atom4_type_index
178 nonbonded_parameter_index = nonbonded_parameter_indices[nonbonded_parameter_index_index]
179 # And this is the index we use to get the parameters we really want
180 dihedral_data['lj_acoef'] = lj_acoefs[nonbonded_parameter_index]
181 dihedral_data['lj_bcoef'] = lj_bcoefs[nonbonded_parameter_index]
182 # Save the new dihedral dict in the overall dict
183 dihedrals_data[atom_indices] = dihedral_data
184 # Add current dihedral term
185 dihedral_data['terms'].append({
186 #'index': dihedral_index,
187 'force': force_constant,
188 'period': periodicity,
189 'phase': phase,
190 })
191 # If this dihedral term is not the one used to calculate non covalent energies the continue
192 if ignored_1_4_energies: continue
193 # Mine non covalent energy factors
194 # These values are noted for the whole dihedral and not for each term
195 # This is because multiterm dihedrals do not have more than one term with end interactions
196 ee_scale_factor = ee_scale_factors[dihedral_index]
197 vdw_scale_factor = vdw_scale_factors[dihedral_index]
198 dihedral_data['ee_scaling'] = ee_scale_factor
199 dihedral_data['vdw_scaling'] = vdw_scale_factor
200 # Get end atom indices
201 atom1_index = atom_indices[0]
202 atom4_index = atom_indices[3]
204 return list(dihedrals_data.values())