Coverage for mddb_workflow/tools/process_interactions.py: 71%
139 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1import itertools
2import shlex
4from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory
5from mddb_workflow.utils.auxiliar import InputError, TestFailure, save_json, warn, reprint
6from mddb_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME
7from mddb_workflow.utils.type_hints import *
8from mddb_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices
10# Set the default distance cutoff in Ångstroms (Å)
11# This is useful for atomistic simulations
12DEFAULT_DISTANCE_CUTOFF = 5
14# Set which fields are to be uploaded to the database only
15# i.e. not vmd selections or residue numbers
16UPLOAD_FIELDS = {
17 'name',
18 'agent_1',
19 'agent_2',
20 'atom_indices_1',
21 'atom_indices_2',
22 'interface_atom_indices_1',
23 'interface_atom_indices_2',
24 'version',
25 'strong_bonds',
26 'has_cg'
27}
29# Set the flag used to label failed interactions
30FAILED_INTERACTION_FLAG = 'failed'
32# Find interfaces by computing a minimum distance between residues along the trajectory
33# Residues are filtered by minimum distance along the trajectory
34# Interface residues are listed apart
35# The heavy results of interactions are stored in a json file which is uploaded to the database independently
36# In addition, this file may be used to force interactions with custom interface residues manually
37def process_interactions (
38 input_interactions : Optional[list],
39 structure_file : 'File',
40 trajectory_file : 'File',
41 structure : 'Structure',
42 snapshots : int,
43 output_directory : str,
44 mercy : list[str],
45 interactions_auto : str,
46 pbc_selection : 'Selection',
47 frames_limit : int = 1000,
48 # Percent of frames where an interaction must have place (from 0 to 1)
49 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned
50 interaction_cutoff : float = 0.1,
51 ) -> list:
52 """Find the residues of each interacting agent.
53 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions.
54 """
55 # Set our own internal interactions
56 interactions = []
58 # If there are no interactions then stop here
59 if input_interactions == None: return []
60 # If interactions is not a list then make it a list of it
61 # if it is already a list then copy it to avoid mutating the original
62 if type(input_interactions) == list:
63 interactions = [ *input_interactions ]
64 else:
65 interactions = [input_interactions]
66 # If interactions is an empty list then stop here
67 if len(input_interactions) == 0: return []
69 # Get explicit interactions (not keywords)
70 # Duplicate input interactions to avoid modifying the originals
71 explicit_interactions = [
72 { k:v for k,v in inter.items() } for inter in interactions if type(inter) == dict ]
74 # Since this is the first time we read the input interactions, we must make sure they are correct
75 # Make sure there are no interactions with the same name
76 interaction_names = [ interaction['name'] for interaction in explicit_interactions ]
77 if len(set(interaction_names)) < len(interaction_names):
78 raise InputError('Interactions must have unique names')
79 # Check input interactions to have every expected field and make sure they are coherent
80 for interaction in explicit_interactions:
81 name = interaction["name"]
82 # Check agents have different names
83 if interaction['agent_1'] == interaction['agent_2']:
84 raise InputError(f'Interaction agents must have different names at {name}')
85 # Check agents have different selections
86 input_agent_1_selection = interaction['selection_1']
87 input_agent_2_selection = interaction['selection_2']
88 if input_agent_1_selection == input_agent_2_selection:
89 raise InputError(f'Interaction agents must have different selections at {name}')
90 # Make sure both agents have valid selections
91 agent_1_selection = structure.select(input_agent_1_selection)
92 if not agent_1_selection:
93 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 1 ({interaction["agent_1"]}): {interaction["selection_1"]}')
94 agent_2_selection = structure.select(input_agent_2_selection)
95 if not agent_2_selection:
96 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 2 ({interaction["agent_2"]}): {interaction["selection_2"]}')
97 # Make sure selections do not overlap at all
98 # This makes not sense as interactions are implemented in this workflow
99 overlap = agent_1_selection & agent_2_selection
100 if overlap: raise InputError(
101 f'Agents in interaction "{name}" have {len(overlap)} overlapping atoms.\n' +
102 f'This means that the atom selection in agent 1 ({input_agent_1_selection})' +
103 f' and the atom selection in agent 2 ({input_agent_2_selection}) aim for' +
104 f' {len(overlap)} atoms in common.' +
105 f' This is not supported since it makes no sense in this context ' +
106 f'to check the interaction of a group of atoms against themselves.\n'
107 f'Please change the atom selections for this interaction.')
109 # Get keywords from the input interactions
110 # Right now the only supported instruction is 'auto'
111 keyword_interactions = set([ inter for inter in interactions if type(inter) == str ])
113 # If the iauto argument is used from the console then add it here
114 if interactions_auto:
115 keyword_interactions.add( f'auto "{interactions_auto}"' )
117 # Now process keyword interactions
118 for keyword in keyword_interactions:
119 # Parse the keyword
120 arguments = shlex.split(keyword)
121 # Depending on the header we set the behaviour
122 header = arguments[0]
123 options = arguments[1:]
124 # Find all possible interactions in a specific region
125 # All the structure is used by default if no selection is passed
126 # Note that an interaction is set by every possible combination of pairs of chains
127 if header == 'auto':
128 print(f' Processing interactions automatically')
129 if len(options) > 1: raise InputError('Automatic interactions support one selection only.\n'+
130 f' Your instruction was: auto {" ".join(options)}\n'+
131 f' Did you forget to add the quotes maybe? Try this: auto "{" ".join(options)}"')
132 # Get the structure regions which are not in PBC
133 reference_structure = structure.filter(pbc_selection) if pbc_selection else structure
134 # Set the selection of atoms where this logic is to be applied, if any
135 # If a selection is passed then filter the structure before finding its chains
136 if len(options) == 1:
137 selection = options[0]
138 parsed_selection = structure.select(selection)
139 if not parsed_selection:
140 raise InputError(f'Selection for automatic interaction "{selection}" is empty')
141 reference_structure = structure.filter(parsed_selection)
142 # Iterate possible combinations of chains
143 for chain1, chain2 in itertools.combinations(reference_structure.chains, 2):
144 interaction = {
145 "name": f"chain {chain1.name}-chain {chain2.name} interaction",
146 "agent_1": f"chain {chain1.name}",
147 "agent_2": f"chain {chain2.name}",
148 "selection_1": f"chain {chain1.name}",
149 "selection_2": f"chain {chain2.name}",
150 "auto": True,
151 }
152 # Now add the explicit interaction to the list
153 explicit_interactions.append(interaction)
154 # If we do not recognize the keyword header then we complain
155 else:
156 raise InputError(f'Instruction "{header}" not supported as interactions input')
158 # If there are no interactions at this point then return an empty list
159 interaction_count = len(explicit_interactions)
160 if interaction_count == 0: return []
162 # Set the output filepath
163 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}'
165 # If trajectory frames number is bigger than the limit we create a reduced trajectory
166 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(
167 structure_file,
168 trajectory_file,
169 snapshots,
170 frames_limit,
171 )
173 # Get the structure coarse grain selection for further reference
174 cg_selection = structure.select_cg()
176 # Set if we are to have mercy when an interaction fails
177 have_mercy = STABLE_INTERACTIONS_FLAG in mercy
179 # Iterate over each defined interaction
180 for interaction in explicit_interactions:
181 interaction_name = interaction["name"]
182 # Set the distance cutoff
183 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF)
184 # Find if this interaction has coarse grain atoms involved
185 agent_1_selection = structure.select(interaction['selection_1'])
186 agent_2_selection = structure.select(interaction['selection_2'])
187 has_agent_1_cg = bool(agent_1_selection & cg_selection)
188 has_agent_2_cg = bool(agent_2_selection & cg_selection)
189 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg
190 # Check if we are using the defualt atomistic distance while selections are coarse grain
191 # If this is the case then warn the user
192 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF:
193 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n'
194 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"')
195 # Find out the interaction residues for each frame and save all residues as the overall interface
196 interface_results = get_interface_atom_indices(
197 structure_file.path,
198 reduced_trajectory_filepath,
199 interaction['selection_1'],
200 interaction['selection_2'],
201 distance_cutoff
202 )
203 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it
204 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames']
205 pretty_frames_percent = str(round(frames_percent * 10000) / 100)
206 if frames_percent < interaction_cutoff:
207 # If this interaction was set automatically then simply skip it silently
208 if interaction.get('auto', False):
209 interaction[FAILED_INTERACTION_FLAG] = True
210 continue
211 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory'
212 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n'
213 f'This means the interaction {meaning_log}.\n'
214 'Check agent selections are correct or consider removing this interaction from the inputs.\n'
215 f' - Agent 1 selection: {interaction["selection_1"]}\n'
216 f' - Agent 2 selection: {interaction["selection_2"]}')
217 # If we are not to have mercy in case of interaction failure then stop here
218 if not have_mercy: raise TestFailure('An interaction failed to be set.\n'
219 'Use the "--mercy interact" flag for the workflow to continue.\n'
220 'Failed interactions will be ignored and will not appear in further analyses.')
221 # If the workflow is not to be killed then just remove this interaction from the interactions list
222 # Thus it will not be considered in interaction analyses and it will not appear in the metadata
223 # To not mess the interactions iteration we simply flag the interaction
224 # It will be removed further
225 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses')
226 interaction[FAILED_INTERACTION_FLAG] = True
227 continue
229 # Iterate interaction agents
230 for agent in ['1','2']:
231 # Get agent name and selection for logging purposes
232 agent_name = interaction['agent_' + agent]
233 agent_selection = interaction['selection_' + agent]
234 # Save atom indices in the interaction object
235 atom_indices = interface_results[f'selection_{agent}_atom_indices']
236 # This should never happen, but make sure they are not empty
237 if len(atom_indices) == 0:
238 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')
239 interaction[f'atom_indices_{agent}'] = atom_indices
240 # Save interface atom indices in the interaction object
241 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices']
242 # This should never happen, but make sure they are not empty
243 if len(interface_atom_indices) == 0:
244 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')
245 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices
247 # Add residue notations
248 add_residues_indices(interaction, structure)
250 # Find strong bonds between residues in different interfaces
251 # Use the main topology, which is corrected and thus will retrieve the right bonds
252 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2'])
253 interaction['strong_bonds'] = strong_bonds
255 # If one of the agents is fully made of strong bonds then the interaction is not valid
256 strong_bonded_atoms = set(sum(strong_bonds, []))
257 if all( index in strong_bonded_atoms for index in agent_1_selection.atom_indices ) or \
258 all( index in strong_bonded_atoms for index in agent_2_selection.atom_indices ):
259 warn(f'Interaction "{interaction_name}" is not valid since one of the agents is fully bonded to the other agent.\n'
260 'This may be due to wrong selections or a wrong interaction to be considered.\n'
261 'This interaction will be ignored and will not appear in further analyses')
262 interaction[FAILED_INTERACTION_FLAG] = True
263 continue
265 # Save the interactions version
266 interaction['version'] = '2.0.0'
268 # Log the final results
269 interface_residue_indices = sorted(interaction["interface_residue_indices_1"]
270 + interaction["interface_residue_indices_2"])
271 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}')
273 # Filter away interactions which have failed
274 valid_interactions = [ inte for inte in explicit_interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ]
275 interaction_count = len(valid_interactions)
276 print(f'There is a total of {interaction_count} valid interactions')
278 # If there are no valid interactions then stop here
279 if interaction_count == 0: return []
281 # Print an empty line for the next reprint
282 print()
283 # Check input interactions to be correct
284 for i, interaction in enumerate(valid_interactions, 1):
285 name = interaction["name"]
286 agent_1_selection = structure.select(interaction['selection_1'])
287 agent_2_selection = structure.select(interaction['selection_2'])
288 reprint(f' Finding interaction type in {name} ({i}/{interaction_count})')
289 # Check if there was a type already assigned to the interaction
290 # This is not supported anymore since the interaction type is set automatically
291 if 'type' in interaction:
292 warn(f'Interaction type "{interaction["type"]}" is set for interaction "{name}".\n'
293 'Interaction type is now calculated and the input interaction type is no longer supported.\n'
294 'Note that the input value will be ignored')
295 # Set the interaction type
296 # LORE: The type was a user input back in time but now we find it automatically
297 # WARNING: Do not calculate the type from the interface residue instead of the whole agent
298 # WARNING: This seems more coherent BUT the type will be written in the PROJECT metadata
299 # WARNING: Interaction type is a valuable search parameter and thus it must remain in project metadata
300 # WARNING: However we could have different types in different MDs, if the interaction is different
301 agent_1_classification = structure.get_selection_classification(agent_1_selection)
302 agent_2_classification = structure.get_selection_classification(agent_2_selection)
303 alphabetically_sorted = sorted([agent_1_classification, agent_2_classification])
304 interaction['type'] = f'{alphabetically_sorted[0]}-{alphabetically_sorted[1]}'
306 # Create interaction duplicates to avoid mutating the already processed interactions
307 # Then fill these duplicates only with those fields to be uploaded to the database
308 file_interactions = []
309 for interaction in valid_interactions:
310 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS }
311 file_interactions.append(file_interaction)
313 # Write them to disk
314 save_json(file_interactions, output_analysis_filepath, indent = 4)
316 # Finally return the processed interactions
317 return valid_interactions
319# Set an auxiliar function to add residue indices to an interactions object
320def add_residues_indices (interaction : dict, structure : 'Structure'):
321 # Iterate interaction agents
322 for agent in ['1','2']:
323 # Get interaction atom indices
324 atom_indices = interaction[f'atom_indices_{agent}']
325 # Now parse atom indices to residue indices for those analysis which work with residues
326 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ])))
327 interaction[f'residue_indices_{agent}'] = residue_indices
328 # Get interaction interface atom indices
329 interface_atom_indices = interaction[f'interface_atom_indices_{agent}']
330 # Then with interface atoms/residues
331 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ])))
332 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices