Coverage for model_workflow/utils/topologies.py: 96%

1import re

3from model_workflow.utils.gmx_spells import get_tpr_content

4from model_workflow.utils.type_hints import *

6# Set some constants

7AMBER_FLAG_PATTERN = r'%FLAG ([A-Z_]*)\s*'

8AMBER_FORMAT_PATTERN = r'%FORMAT\(([0-9A-Za-z.]*)\)\s*'

9AMBER_FLAG_FORMAT_TYPES = {

10 '20a4': str,

11 '1a80': str,

12 '10I8': int,

13 '20I4': int,

14 '1I8': int,

15 '3I8': int,

16 '5E16.8': float,

17}

19# Unified class to read topologies of multiple formats

20# This class relies in other readers and reads what others do not reach

21class Topology:

22 def __init__ (self, file : 'File'):

23 self.file = file

24 self.format = file.format

25 # Set internal variables

26 self._raw_content = None

27 self._flag_summary = None

29 # Read topology file content

30 def read_raw_content (self):

31 # If we already read the content then return it

32 if self._raw_content: return self._raw_content

33 # TPR topologies are not readable as they are, since they are in binary format

34 if self.format == 'tpr':

35 output, error = get_tpr_content(self.path)

36 self._raw_content = output

37 return self._raw_content

38 # The rest of formats are usually readable ASCII

39 with open(self.file.path, 'r') as file:

40 self._raw_content = file.readlines()

41 return self._raw_content

42 raw_content = property(read_raw_content, None, None, "Topology file content (read only)")

44 # Amber topologies only

45 # Set a summary with all the available falgs and their lines

46 def get_flag_summary (self) -> dict:

47 # If we already made the summary then return it

48 if self._flag_summary: return self._flag_summary

49 # Otherwise build the summary

50 summary = {}

51 last_flag_lines = {}

52 # Iterate lines in the topology raw content until we find a target flag

53 line_number = 0

54 for line in self.raw_content:

55 match = re.search(AMBER_FLAG_PATTERN, line)

56 if match:

57 # Close the previous flag lines dict (first time is redundant)

58 last_flag_lines['to'] = line_number-1

59 # Get the new flag

60 flag = match[1]

61 # Get flag format

62 flag_format_line = self.raw_content[line_number+1]

63 format_match = re.search(AMBER_FORMAT_PATTERN, flag_format_line)

64 if not format_match: raise ValueError(f'Flag {flag} has no format line')

65 format = format_match[1]

66 format_type = AMBER_FLAG_FORMAT_TYPES[format]

67 # Set the new flag lines dict

68 last_flag_lines = { 'type': format_type, 'from': line_number }

69 summary[flag] = last_flag_lines

70 line_number += 1

71 # Finish the last summary lines

72 last_flag_lines['to'] = line_number

73 self._flag_summary = summary

74 return self._flag_summary

75 flag_summary = property(get_flag_summary, None, None, "Amber topologies only. Flags summary and lines (read only)")

77 # Amber topologies only

78 # Mine values under a specific flag

79 # Available flags: TITLE, POINTERS, ATOM_NAME, CHARGE, ATOMIC_NUMBER, MASS, ATOM_TYPE_INDEX,

80 # NUMBER_EXCLUDED_ATOMS, NONBONDED_PARM_INDEX, RESIDUE_LABEL, RESIDUE_POINTER, BOND_FORCE_CONSTANT,

81 # BOND_EQUIL_VALUE, ANGLE_FORCE_CONSTANT, ANGLE_EQUIL_VALUE, DIHEDRAL_FORCE_CONSTANT,

82 # DIHEDRAL_PERIODICITY, DIHEDRAL_PHASE, SCEE_SCALE_FACTOR, SCNB_SCALE_FACTOR, SOLTY,

83 # LENNARD_JONES_ACOEF, LENNARD_JONES_BCOEF, BONDS_INC_HYDROGEN, BONDS_WITHOUT_HYDROGEN,

84 # ANGLES_INC_HYDROGEN, ANGLES_WITHOUT_HYDROGEN, DIHEDRALS_INC_HYDROGEN, DIHEDRALS_WITHOUT_HYDROGEN,

85 # EXCLUDED_ATOMS_LIST, HBOND_ACOEF, HBOND_BCOEF, HBCUT, AMBER_ATOM_TYPE, TREE_CHAIN_CLASSIFICATION,

86 # JOIN_ARRAY, IROTAT, RADIUS_SET, RADII, SCREEN, IPOL

87 def mine_amber_flag (self, flag : str) -> list:

88 # Find the lines to be read

89 summary = self.flag_summary[flag]

90 # Get the type of data in the flag content

91 flag_type = summary['type']

92 # Read the lines

93 # Skip the first 2 lines since they are the flag header and the format header

94 start_line = summary['from'] + 2

95 end_line = summary['to'] + 1

96 lines = self.raw_content[start_line:end_line]

97 # Parse and add all values within the flag in a list

98 values = []

99 for line in lines:

100 # Remove spaces and breaklines, then split the line by spaces

101 string_values = line[0:-1].strip().split()

102 # If flag contents are strings then return string values as they are

103 if flag_type == str: values += string_values

104 # Otherwise parse strings to numeric values if required

105 # Note that flag_type may be int or float

106 else: values += [ flag_type(value) for value in string_values ]

107 return values

108

109 # Get dihedrals data in a standarized format

110 def get_dihedrals_data (self) -> List[dict]:

111 if self.format == 'prmtop': return self._get_dihedrals_data_amber()

112 raise ValueError(f'Reading dihedrals data is not supported in {self.format} topologies')

113

114 # Amber topologies only

115 # Get dihedrals data in a standarized format

116 # https://ambermd.org/FileFormats.php

117 def _get_dihedrals_data_amber (self) -> List[dict]:

118 # Get dihedral-related values from the topology

119 including_hydrogens = self.mine_amber_flag('DIHEDRALS_INC_HYDROGEN')

120 excluding_hydrogens = self.mine_amber_flag('DIHEDRALS_WITHOUT_HYDROGEN')

121 force_constants = self.mine_amber_flag('DIHEDRAL_FORCE_CONSTANT')

122 preiodicities = self.mine_amber_flag('DIHEDRAL_PERIODICITY')

123 phases = self.mine_amber_flag('DIHEDRAL_PHASE')

124 ee_scale_factors = self.mine_amber_flag('SCEE_SCALE_FACTOR')

125 vdw_scale_factors = self.mine_amber_flag('SCNB_SCALE_FACTOR')

126 charges = self.mine_amber_flag('CHARGE')

127 atom_types = self.mine_amber_flag('ATOM_TYPE_INDEX')

128 ntypes = len(set(atom_types))

129 nonbonded_parameter_indices = self.mine_amber_flag('NONBONDED_PARM_INDEX')

130 lj_acoefs = self.mine_amber_flag('LENNARD_JONES_ACOEF')

131 lj_bcoefs = self.mine_amber_flag('LENNARD_JONES_BCOEF')

132 # Start processing dihedrals by finding their implicated atoms and "type"

133 # These values are listed in groups of 5 integer numbers (4 atoms + 1 type)

134 dihedrals_data = {}

135 all_dihedrals = including_hydrogens + excluding_hydrogens

136 dihedral_count = int(len(all_dihedrals) / 5)

137 for dih in range(dihedral_count):

138 start = dih * 5

139 dihedral_numbers = all_dihedrals[start:start+5]

140 # However indices are a bit "encripted" for performance reasons

141 # This is documented somewhere, but you must divide by 3 to get the actual index

142 # Some docs say yo also havo to add 1, but this is only to have the 1-based numeration

143 # If you want the 0-based index then is just dividing by 3

144 # Also the third index may be negative for some dihedrals but this is not literal

145 # The negative number means the dihedral was ignored for end interaction calculations

146 # This means its 1-4 atom electrostatic and van der waals energies are ignored

147 # We make a tuple out of atom indices so we can use them as keys to group dihedrals

148 # Note that some dihedrals have multiple terms

149 encrypted_atom_indices = dihedral_numbers[0:4]

150 atom_indices = tuple([ int(abs(index)/3) for index in encrypted_atom_indices ])

151 ignored_1_4_energies = dihedral_numbers[2] < 0

152 dihedral_type = dihedral_numbers[4]

153 # Get the correspondong dihedral constants

154 # Dihedral type numbering starts at 1, not at 0

155 dihedral_index = dihedral_type - 1

156 force_constant = force_constants[dihedral_index]

157 periodicity = preiodicities[dihedral_index]

158 if periodicity < 0:

159 raise ValueError('Negative preiodicity is documented but not yet supported')

160 phase = phases[dihedral_index]

161 # Get the current dihedral data

162 dihedral_data = dihedrals_data.get(atom_indices, None)

163 if dihedral_data == None:

164 # Create a new object if this is the first time to access this specific dihedral

165 dihedral_data = { 'atom_indices': atom_indices, 'terms': [] }

166 # Get end atom indices

167 atom1_index = atom_indices[0]

168 atom4_index = atom_indices[3]

169 # Mine atom charges for the end atoms

170 dihedral_data['atom1_charge'] = charges[atom1_index]

171 dihedral_data['atom4_charge'] = charges[atom4_index]

172 # To get the next two values we need to find a complex index

173 # We substract 1 to atom type inidces to get 0-based indices

174 atom1_type_index = atom_types[atom1_index] - 1

175 atom4_type_index = atom_types[atom4_index] - 1

176 # Thus the parameter index becomes also 0-based

177 nonbonded_parameter_index_index = (ntypes * atom1_type_index) + atom4_type_index

178 nonbonded_parameter_index = nonbonded_parameter_indices[nonbonded_parameter_index_index]

179 # And this is the index we use to get the parameters we really want

180 dihedral_data['lj_acoef'] = lj_acoefs[nonbonded_parameter_index]

181 dihedral_data['lj_bcoef'] = lj_bcoefs[nonbonded_parameter_index]

182 # Save the new dihedral dict in the overall dict

183 dihedrals_data[atom_indices] = dihedral_data

184 # Add current dihedral term

185 dihedral_data['terms'].append({

186 #'index': dihedral_index,

187 'force': force_constant,

188 'period': periodicity,

189 'phase': phase,

190 })

191 # If this dihedral term is not the one used to calculate non covalent energies the continue

192 if ignored_1_4_energies: continue

193 # Mine non covalent energy factors

194 # These values are noted for the whole dihedral and not for each term

195 # This is because multiterm dihedrals do not have more than one term with end interactions

196 ee_scale_factor = ee_scale_factors[dihedral_index]

197 vdw_scale_factor = vdw_scale_factors[dihedral_index]

198 dihedral_data['ee_scaling'] = ee_scale_factor

199 dihedral_data['vdw_scaling'] = vdw_scale_factor

200 # Get end atom indices

201 atom1_index = atom_indices[0]

202 atom4_index = atom_indices[3]

203

204 return list(dihedrals_data.values())