Coverage for model_workflow/utils/topologies.py: 96%

112 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import re 

2 

3from model_workflow.utils.gmx_spells import get_tpr_content 

4from model_workflow.utils.type_hints import * 

5 

6# Set some constants 

7AMBER_FLAG_PATTERN = r'%FLAG ([A-Z_]*)\s*' 

8AMBER_FORMAT_PATTERN = r'%FORMAT\(([0-9A-Za-z.]*)\)\s*' 

9AMBER_FLAG_FORMAT_TYPES = { 

10 '20a4': str, 

11 '1a80': str, 

12 '10I8': int, 

13 '20I4': int, 

14 '1I8': int, 

15 '3I8': int, 

16 '5E16.8': float, 

17} 

18 

19# Unified class to read topologies of multiple formats 

20# This class relies in other readers and reads what others do not reach 

21class Topology: 

22 def __init__ (self, file : 'File'): 

23 self.file = file 

24 self.format = file.format 

25 # Set internal variables 

26 self._raw_content = None 

27 self._flag_summary = None 

28 

29 # Read topology file content 

30 def read_raw_content (self): 

31 # If we already read the content then return it 

32 if self._raw_content: return self._raw_content 

33 # TPR topologies are not readable as they are, since they are in binary format 

34 if self.format == 'tpr': 

35 output, error = get_tpr_content(self.path) 

36 self._raw_content = output 

37 return self._raw_content 

38 # The rest of formats are usually readable ASCII 

39 with open(self.file.path, 'r') as file: 

40 self._raw_content = file.readlines() 

41 return self._raw_content 

42 raw_content = property(read_raw_content, None, None, "Topology file content (read only)") 

43 

44 # Amber topologies only 

45 # Set a summary with all the available falgs and their lines 

46 def get_flag_summary (self) -> dict: 

47 # If we already made the summary then return it 

48 if self._flag_summary: return self._flag_summary 

49 # Otherwise build the summary 

50 summary = {} 

51 last_flag_lines = {} 

52 # Iterate lines in the topology raw content until we find a target flag 

53 line_number = 0 

54 for line in self.raw_content: 

55 match = re.search(AMBER_FLAG_PATTERN, line) 

56 if match: 

57 # Close the previous flag lines dict (first time is redundant) 

58 last_flag_lines['to'] = line_number-1 

59 # Get the new flag 

60 flag = match[1] 

61 # Get flag format 

62 flag_format_line = self.raw_content[line_number+1] 

63 format_match = re.search(AMBER_FORMAT_PATTERN, flag_format_line) 

64 if not format_match: raise ValueError(f'Flag {flag} has no format line') 

65 format = format_match[1] 

66 format_type = AMBER_FLAG_FORMAT_TYPES[format] 

67 # Set the new flag lines dict 

68 last_flag_lines = { 'type': format_type, 'from': line_number } 

69 summary[flag] = last_flag_lines 

70 line_number += 1 

71 # Finish the last summary lines 

72 last_flag_lines['to'] = line_number 

73 self._flag_summary = summary 

74 return self._flag_summary 

75 flag_summary = property(get_flag_summary, None, None, "Amber topologies only. Flags summary and lines (read only)") 

76 

77 # Amber topologies only 

78 # Mine values under a specific flag 

79 # Available flags: TITLE, POINTERS, ATOM_NAME, CHARGE, ATOMIC_NUMBER, MASS, ATOM_TYPE_INDEX, 

80 # NUMBER_EXCLUDED_ATOMS, NONBONDED_PARM_INDEX, RESIDUE_LABEL, RESIDUE_POINTER, BOND_FORCE_CONSTANT,  

81 # BOND_EQUIL_VALUE, ANGLE_FORCE_CONSTANT, ANGLE_EQUIL_VALUE, DIHEDRAL_FORCE_CONSTANT, 

82 # DIHEDRAL_PERIODICITY, DIHEDRAL_PHASE, SCEE_SCALE_FACTOR, SCNB_SCALE_FACTOR, SOLTY, 

83 # LENNARD_JONES_ACOEF, LENNARD_JONES_BCOEF, BONDS_INC_HYDROGEN, BONDS_WITHOUT_HYDROGEN, 

84 # ANGLES_INC_HYDROGEN, ANGLES_WITHOUT_HYDROGEN, DIHEDRALS_INC_HYDROGEN, DIHEDRALS_WITHOUT_HYDROGEN, 

85 # EXCLUDED_ATOMS_LIST, HBOND_ACOEF, HBOND_BCOEF, HBCUT, AMBER_ATOM_TYPE, TREE_CHAIN_CLASSIFICATION, 

86 # JOIN_ARRAY, IROTAT, RADIUS_SET, RADII, SCREEN, IPOL  

87 def mine_amber_flag (self, flag : str) -> list: 

88 # Find the lines to be read 

89 summary = self.flag_summary[flag] 

90 # Get the type of data in the flag content 

91 flag_type = summary['type'] 

92 # Read the lines 

93 # Skip the first 2 lines since they are the flag header and the format header 

94 start_line = summary['from'] + 2 

95 end_line = summary['to'] + 1 

96 lines = self.raw_content[start_line:end_line] 

97 # Parse and add all values within the flag in a list 

98 values = [] 

99 for line in lines: 

100 # Remove spaces and breaklines, then split the line by spaces 

101 string_values = line[0:-1].strip().split() 

102 # If flag contents are strings then return string values as they are 

103 if flag_type == str: values += string_values 

104 # Otherwise parse strings to numeric values if required 

105 # Note that flag_type may be int or float 

106 else: values += [ flag_type(value) for value in string_values ] 

107 return values 

108 

109 # Get dihedrals data in a standarized format 

110 def get_dihedrals_data (self) -> List[dict]: 

111 if self.format == 'prmtop': return self._get_dihedrals_data_amber() 

112 raise ValueError(f'Reading dihedrals data is not supported in {self.format} topologies') 

113 

114 # Amber topologies only 

115 # Get dihedrals data in a standarized format 

116 # https://ambermd.org/FileFormats.php 

117 def _get_dihedrals_data_amber (self) -> List[dict]: 

118 # Get dihedral-related values from the topology 

119 including_hydrogens = self.mine_amber_flag('DIHEDRALS_INC_HYDROGEN') 

120 excluding_hydrogens = self.mine_amber_flag('DIHEDRALS_WITHOUT_HYDROGEN') 

121 force_constants = self.mine_amber_flag('DIHEDRAL_FORCE_CONSTANT') 

122 preiodicities = self.mine_amber_flag('DIHEDRAL_PERIODICITY') 

123 phases = self.mine_amber_flag('DIHEDRAL_PHASE') 

124 ee_scale_factors = self.mine_amber_flag('SCEE_SCALE_FACTOR') 

125 vdw_scale_factors = self.mine_amber_flag('SCNB_SCALE_FACTOR') 

126 charges = self.mine_amber_flag('CHARGE') 

127 atom_types = self.mine_amber_flag('ATOM_TYPE_INDEX') 

128 ntypes = len(set(atom_types)) 

129 nonbonded_parameter_indices = self.mine_amber_flag('NONBONDED_PARM_INDEX') 

130 lj_acoefs = self.mine_amber_flag('LENNARD_JONES_ACOEF') 

131 lj_bcoefs = self.mine_amber_flag('LENNARD_JONES_BCOEF') 

132 # Start processing dihedrals by finding their implicated atoms and "type" 

133 # These values are listed in groups of 5 integer numbers (4 atoms + 1 type) 

134 dihedrals_data = {} 

135 all_dihedrals = including_hydrogens + excluding_hydrogens 

136 dihedral_count = int(len(all_dihedrals) / 5) 

137 for dih in range(dihedral_count): 

138 start = dih * 5 

139 dihedral_numbers = all_dihedrals[start:start+5] 

140 # However indices are a bit "encripted" for performance reasons 

141 # This is documented somewhere, but you must divide by 3 to get the actual index 

142 # Some docs say yo also havo to add 1, but this is only to have the 1-based numeration 

143 # If you want the 0-based index then is just dividing by 3 

144 # Also the third index may be negative for some dihedrals but this is not literal 

145 # The negative number means the dihedral was ignored for end interaction calculations 

146 # This means its 1-4 atom electrostatic and van der waals energies are ignored 

147 # We make a tuple out of atom indices so we can use them as keys to group dihedrals 

148 # Note that some dihedrals have multiple terms 

149 encrypted_atom_indices = dihedral_numbers[0:4] 

150 atom_indices = tuple([ int(abs(index)/3) for index in encrypted_atom_indices ]) 

151 ignored_1_4_energies = dihedral_numbers[2] < 0 

152 dihedral_type = dihedral_numbers[4] 

153 # Get the correspondong dihedral constants 

154 # Dihedral type numbering starts at 1, not at 0 

155 dihedral_index = dihedral_type - 1 

156 force_constant = force_constants[dihedral_index] 

157 periodicity = preiodicities[dihedral_index] 

158 if periodicity < 0: 

159 raise ValueError('Negative preiodicity is documented but not yet supported') 

160 phase = phases[dihedral_index] 

161 # Get the current dihedral data 

162 dihedral_data = dihedrals_data.get(atom_indices, None) 

163 if dihedral_data == None: 

164 # Create a new object if this is the first time to access this specific dihedral 

165 dihedral_data = { 'atom_indices': atom_indices, 'terms': [] } 

166 # Get end atom indices 

167 atom1_index = atom_indices[0] 

168 atom4_index = atom_indices[3] 

169 # Mine atom charges for the end atoms 

170 dihedral_data['atom1_charge'] = charges[atom1_index] 

171 dihedral_data['atom4_charge'] = charges[atom4_index] 

172 # To get the next two values we need to find a complex index 

173 # We substract 1 to atom type inidces to get 0-based indices 

174 atom1_type_index = atom_types[atom1_index] - 1 

175 atom4_type_index = atom_types[atom4_index] - 1 

176 # Thus the parameter index becomes also 0-based 

177 nonbonded_parameter_index_index = (ntypes * atom1_type_index) + atom4_type_index 

178 nonbonded_parameter_index = nonbonded_parameter_indices[nonbonded_parameter_index_index] 

179 # And this is the index we use to get the parameters we really want 

180 dihedral_data['lj_acoef'] = lj_acoefs[nonbonded_parameter_index] 

181 dihedral_data['lj_bcoef'] = lj_bcoefs[nonbonded_parameter_index] 

182 # Save the new dihedral dict in the overall dict 

183 dihedrals_data[atom_indices] = dihedral_data 

184 # Add current dihedral term 

185 dihedral_data['terms'].append({ 

186 #'index': dihedral_index, 

187 'force': force_constant, 

188 'period': periodicity, 

189 'phase': phase, 

190 }) 

191 # If this dihedral term is not the one used to calculate non covalent energies the continue 

192 if ignored_1_4_energies: continue 

193 # Mine non covalent energy factors 

194 # These values are noted for the whole dihedral and not for each term 

195 # This is because multiterm dihedrals do not have more than one term with end interactions 

196 ee_scale_factor = ee_scale_factors[dihedral_index] 

197 vdw_scale_factor = vdw_scale_factors[dihedral_index] 

198 dihedral_data['ee_scaling'] = ee_scale_factor 

199 dihedral_data['vdw_scaling'] = vdw_scale_factor 

200 # Get end atom indices 

201 atom1_index = atom_indices[0] 

202 atom4_index = atom_indices[3] 

203 

204 return list(dihedrals_data.values())