Coverage for mddb_workflow/tools/process

1import itertools

2import shlex

4from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory

5from mddb_workflow.utils.auxiliar import InputError, TestFailure, save_json, warn, reprint

6from mddb_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME

7from mddb_workflow.utils.type_hints import *

8from mddb_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices

10# Set the default distance cutoff in Ångstroms (Å)

11# This is useful for atomistic simulations

12DEFAULT_DISTANCE_CUTOFF = 5

14# Set which fields are to be uploaded to the database only

15# i.e. not vmd selections or residue numbers

16UPLOAD_FIELDS = {

17 'name',

18 'agent_1',

19 'agent_2',

20 'atom_indices_1',

21 'atom_indices_2',

22 'interface_atom_indices_1',

23 'interface_atom_indices_2',

24 'version',

25 'strong_bonds',

26 'has_cg'

27}

29# Set the flag used to label failed interactions

30FAILED_INTERACTION_FLAG = 'failed'

32# Find interfaces by computing a minimum distance between residues along the trajectory

33# Residues are filtered by minimum distance along the trajectory

34# Interface residues are listed apart

35# The heavy results of interactions are stored in a json file which is uploaded to the database independently

36# In addition, this file may be used to force interactions with custom interface residues manually

37def process_interactions (

38 input_interactions : Optional[list],

39 structure_file : 'File',

40 trajectory_file : 'File',

41 structure : 'Structure',

42 snapshots : int,

43 output_directory : str,

44 mercy : list[str],

45 interactions_auto : str,

46 pbc_selection : 'Selection',

47 frames_limit : int = 1000,

48 # Percent of frames where an interaction must have place (from 0 to 1)

49 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned

50 interaction_cutoff : float = 0.1,

51 ) -> list:

52 """Find the residues of each interacting agent.

53 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions.

54 """

55 # Set our own internal interactions

56 interactions = []

58 # If there are no interactions then stop here

59 if input_interactions == None: return []

60 # If interactions is not a list then make it a list of it

61 # if it is already a list then copy it to avoid mutating the original

62 if type(input_interactions) == list:

63 interactions = [ *input_interactions ]

64 else:

65 interactions = [input_interactions]

66 # If interactions is an empty list then stop here

67 if len(input_interactions) == 0: return []

69 # Get explicit interactions (not keywords)

70 # Duplicate input interactions to avoid modifying the originals

71 explicit_interactions = [

72 { k:v for k,v in inter.items() } for inter in interactions if type(inter) == dict ]

74 # Since this is the first time we read the input interactions, we must make sure they are correct

75 # Make sure there are no interactions with the same name

76 interaction_names = [ interaction['name'] for interaction in explicit_interactions ]

77 if len(set(interaction_names)) < len(interaction_names):

78 raise InputError('Interactions must have unique names')

79 # Check input interactions to have every expected field and make sure they are coherent

80 for interaction in explicit_interactions:

81 name = interaction["name"]

82 # Check agents have different names

83 if interaction['agent_1'] == interaction['agent_2']:

84 raise InputError(f'Interaction agents must have different names at {name}')

85 # Check agents have different selections

86 input_agent_1_selection = interaction['selection_1']

87 input_agent_2_selection = interaction['selection_2']

88 if input_agent_1_selection == input_agent_2_selection:

89 raise InputError(f'Interaction agents must have different selections at {name}')

90 # Make sure both agents have valid selections

91 agent_1_selection = structure.select(input_agent_1_selection)

92 if not agent_1_selection:

93 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 1 ({interaction["agent_1"]}): {interaction["selection_1"]}')

94 agent_2_selection = structure.select(input_agent_2_selection)

95 if not agent_2_selection:

96 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 2 ({interaction["agent_2"]}): {interaction["selection_2"]}')

97 # Make sure selections do not overlap at all

98 # This makes not sense as interactions are implemented in this workflow

99 overlap = agent_1_selection & agent_2_selection

100 if overlap: raise InputError(

101 f'Agents in interaction "{name}" have {len(overlap)} overlapping atoms.\n' +

102 f'This means that the atom selection in agent 1 ({input_agent_1_selection})' +

103 f' and the atom selection in agent 2 ({input_agent_2_selection}) aim for' +

104 f' {len(overlap)} atoms in common.' +

105 f' This is not supported since it makes no sense in this context ' +

106 f'to check the interaction of a group of atoms against themselves.\n'

107 f'Please change the atom selections for this interaction.')

108

109 # Get keywords from the input interactions

110 # Right now the only supported instruction is 'auto'

111 keyword_interactions = set([ inter for inter in interactions if type(inter) == str ])

112

113 # If the iauto argument is used from the console then add it here

114 if interactions_auto:

115 keyword_interactions.add( f'auto "{interactions_auto}"' )

116

117 # Now process keyword interactions

118 for keyword in keyword_interactions:

119 # Parse the keyword

120 arguments = shlex.split(keyword)

121 # Depending on the header we set the behaviour

122 header = arguments[0]

123 options = arguments[1:]

124 # Find all possible interactions in a specific region

125 # All the structure is used by default if no selection is passed

126 # Note that an interaction is set by every possible combination of pairs of chains

127 if header == 'auto':

128 print(f' Processing interactions automatically')

129 if len(options) > 1: raise InputError('Automatic interactions support one selection only.\n'+

130 f' Your instruction was: auto {" ".join(options)}\n'+

131 f' Did you forget to add the quotes maybe? Try this: auto "{" ".join(options)}"')

132 # Get the structure regions which are not in PBC

133 reference_structure = structure.filter(pbc_selection) if pbc_selection else structure

134 # Set the selection of atoms where this logic is to be applied, if any

135 # If a selection is passed then filter the structure before finding its chains

136 if len(options) == 1:

137 selection = options[0]

138 parsed_selection = structure.select(selection)

139 if not parsed_selection:

140 raise InputError(f'Selection for automatic interaction "{selection}" is empty')

141 reference_structure = structure.filter(parsed_selection)

142 # Iterate possible combinations of chains

143 for chain1, chain2 in itertools.combinations(reference_structure.chains, 2):

144 interaction = {

145 "name": f"chain {chain1.name}-chain {chain2.name} interaction",

146 "agent_1": f"chain {chain1.name}",

147 "agent_2": f"chain {chain2.name}",

148 "selection_1": f"chain {chain1.name}",

149 "selection_2": f"chain {chain2.name}",

150 "auto": True,

151 }

152 # Now add the explicit interaction to the list

153 explicit_interactions.append(interaction)

154 # If we do not recognize the keyword header then we complain

155 else:

156 raise InputError(f'Instruction "{header}" not supported as interactions input')

157

158 # If there are no interactions at this point then return an empty list

159 interaction_count = len(explicit_interactions)

160 if interaction_count == 0: return []

161

162 # Set the output filepath

163 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}'

164

165 # If trajectory frames number is bigger than the limit we create a reduced trajectory

166 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(

167 structure_file,

168 trajectory_file,

169 snapshots,

170 frames_limit,

171 )

172

173 # Get the structure coarse grain selection for further reference

174 cg_selection = structure.select_cg()

175

176 # Set if we are to have mercy when an interaction fails

177 have_mercy = STABLE_INTERACTIONS_FLAG in mercy

178

179 # Iterate over each defined interaction

180 for interaction in explicit_interactions:

181 interaction_name = interaction["name"]

182 # Set the distance cutoff

183 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF)

184 # Find if this interaction has coarse grain atoms involved

185 agent_1_selection = structure.select(interaction['selection_1'])

186 agent_2_selection = structure.select(interaction['selection_2'])

187 has_agent_1_cg = bool(agent_1_selection & cg_selection)

188 has_agent_2_cg = bool(agent_2_selection & cg_selection)

189 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg

190 # Check if we are using the defualt atomistic distance while selections are coarse grain

191 # If this is the case then warn the user

192 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF:

193 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n'

194 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"')

195 # Find out the interaction residues for each frame and save all residues as the overall interface

196 interface_results = get_interface_atom_indices(

197 structure_file.path,

198 reduced_trajectory_filepath,

199 interaction['selection_1'],

200 interaction['selection_2'],

201 distance_cutoff

202 )

203 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it

204 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames']

205 pretty_frames_percent = str(round(frames_percent * 10000) / 100)

206 if frames_percent < interaction_cutoff:

207 # If this interaction was set automatically then simply skip it silently

208 if interaction.get('auto', False):

209 interaction[FAILED_INTERACTION_FLAG] = True

210 continue

211 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory'

212 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n'

213 f'This means the interaction {meaning_log}.\n'

214 'Check agent selections are correct or consider removing this interaction from the inputs.\n'

215 f' - Agent 1 selection: {interaction["selection_1"]}\n'

216 f' - Agent 2 selection: {interaction["selection_2"]}')

217 # If we are not to have mercy in case of interaction failure then stop here

218 if not have_mercy: raise TestFailure('An interaction failed to be set.\n'

219 'Use the "--mercy interact" flag for the workflow to continue.\n'

220 'Failed interactions will be ignored and will not appear in further analyses.')

221 # If the workflow is not to be killed then just remove this interaction from the interactions list

222 # Thus it will not be considered in interaction analyses and it will not appear in the metadata

223 # To not mess the interactions iteration we simply flag the interaction

224 # It will be removed further

225 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses')

226 interaction[FAILED_INTERACTION_FLAG] = True

227 continue

228

229 # Iterate interaction agents

230 for agent in ['1','2']:

231 # Get agent name and selection for logging purposes

232 agent_name = interaction['agent_' + agent]

233 agent_selection = interaction['selection_' + agent]

234 # Save atom indices in the interaction object

235 atom_indices = interface_results[f'selection_{agent}_atom_indices']

236 # This should never happen, but make sure they are not empty

237 if len(atom_indices) == 0:

238 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')

239 interaction[f'atom_indices_{agent}'] = atom_indices

240 # Save interface atom indices in the interaction object

241 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices']

242 # This should never happen, but make sure they are not empty

243 if len(interface_atom_indices) == 0:

244 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')

245 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices

246

247 # Add residue notations

248 add_residues_indices(interaction, structure)

249

250 # Find strong bonds between residues in different interfaces

251 # Use the main topology, which is corrected and thus will retrieve the right bonds

252 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2'])

253 interaction['strong_bonds'] = strong_bonds

254

255 # If one of the agents is fully made of strong bonds then the interaction is not valid

256 strong_bonded_atoms = set(sum(strong_bonds, []))

257 if all( index in strong_bonded_atoms for index in agent_1_selection.atom_indices ) or \

258 all( index in strong_bonded_atoms for index in agent_2_selection.atom_indices ):

259 warn(f'Interaction "{interaction_name}" is not valid since one of the agents is fully bonded to the other agent.\n'

260 'This may be due to wrong selections or a wrong interaction to be considered.\n'

261 'This interaction will be ignored and will not appear in further analyses')

262 interaction[FAILED_INTERACTION_FLAG] = True

263 continue

264

265 # Save the interactions version

266 interaction['version'] = '2.0.0'

267

268 # Log the final results

269 interface_residue_indices = sorted(interaction["interface_residue_indices_1"]

270 + interaction["interface_residue_indices_2"])

271 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}')

272

273 # Filter away interactions which have failed

274 valid_interactions = [ inte for inte in explicit_interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ]

275 interaction_count = len(valid_interactions)

276 print(f'There is a total of {interaction_count} valid interactions')

277

278 # If there are no valid interactions then stop here

279 if interaction_count == 0: return []

280

281 # Print an empty line for the next reprint

282 print()

283 # Check input interactions to be correct

284 for i, interaction in enumerate(valid_interactions, 1):

285 name = interaction["name"]

286 agent_1_selection = structure.select(interaction['selection_1'])

287 agent_2_selection = structure.select(interaction['selection_2'])

288 reprint(f' Finding interaction type in {name} ({i}/{interaction_count})')

289 # Check if there was a type already assigned to the interaction

290 # This is not supported anymore since the interaction type is set automatically

291 if 'type' in interaction:

292 warn(f'Interaction type "{interaction["type"]}" is set for interaction "{name}".\n'

293 'Interaction type is now calculated and the input interaction type is no longer supported.\n'

294 'Note that the input value will be ignored')

295 # Set the interaction type

296 # LORE: The type was a user input back in time but now we find it automatically

297 # WARNING: Do not calculate the type from the interface residue instead of the whole agent

298 # WARNING: This seems more coherent BUT the type will be written in the PROJECT metadata

299 # WARNING: Interaction type is a valuable search parameter and thus it must remain in project metadata

300 # WARNING: However we could have different types in different MDs, if the interaction is different

301 agent_1_classification = structure.get_selection_classification(agent_1_selection)

302 agent_2_classification = structure.get_selection_classification(agent_2_selection)

303 alphabetically_sorted = sorted([agent_1_classification, agent_2_classification])

304 interaction['type'] = f'{alphabetically_sorted[0]}-{alphabetically_sorted[1]}'

305

306 # Create interaction duplicates to avoid mutating the already processed interactions

307 # Then fill these duplicates only with those fields to be uploaded to the database

308 file_interactions = []

309 for interaction in valid_interactions:

310 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS }

311 file_interactions.append(file_interaction)

312

313 # Write them to disk

314 save_json(file_interactions, output_analysis_filepath, indent = 4)

315

316 # Finally return the processed interactions

317 return valid_interactions

318

319# Set an auxiliar function to add residue indices to an interactions object

320def add_residues_indices (interaction : dict, structure : 'Structure'):

321 # Iterate interaction agents

322 for agent in ['1','2']:

323 # Get interaction atom indices

324 atom_indices = interaction[f'atom_indices_{agent}']

325 # Now parse atom indices to residue indices for those analysis which work with residues

326 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ])))

327 interaction[f'residue_indices_{agent}'] = residue_indices

328 # Get interaction interface atom indices

329 interface_atom_indices = interaction[f'interface_atom_indices_{agent}']

330 # Then with interface atoms/residues

331 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ])))

332 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices

Coverage for mddb_workflow/tools/process_interactions.py: 71%

139 statements