Coverage for model_workflow/tools/process_interactions.py: 49%
144 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import itertools
2from os.path import exists
4from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory
5from model_workflow.utils.auxiliar import InputError, TestFailure, load_json, save_json, warn
6from model_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME
7from model_workflow.utils.type_hints import *
8from model_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices
10# Set the default distance cutoff in Ångstroms (Å)
11# This is useful for atomistic simulations
12DEFAULT_DISTANCE_CUTOFF = 5
14# Set which fields are to be uploaded to the database only
15# i.e. not vmd selections or residue numbers
16UPLOAD_FIELDS = [
17 'name',
18 'agent_1',
19 'agent_2',
20 'atom_indices_1',
21 'atom_indices_2',
22 'interface_atom_indices_1',
23 'interface_atom_indices_2',
24 'version',
25 'strong_bonds',
26 'has_cg'
27]
29# Set the flag used to label failed interactions
30FAILED_INTERACTION_FLAG = 'failed'
32# Find interfaces by computing a minimum distance between residues along the trajectory
33# Residues are filtered by minimum distance along the trajectory
34# Interface residues are listed apart
35# The heavy results of interactions are stored in a json file which is uploaded to the database independently
36# This file is also used as a backup here, since calculating interactions is a heavy calculation
37# In addition, this file may be used to force interactions with custom interface residues manually
38def process_interactions (
39 input_interactions : Optional[list],
40 structure_file : 'File',
41 trajectory_file : 'File',
42 structure : 'Structure',
43 snapshots : int,
44 output_directory : str,
45 mercy : List[str],
46 interactions_auto : str,
47 ligand_map : List[dict],
48 pbc_selection : 'Selection',
49 frames_limit : int = 1000,
50 # Percent of frames where an interaction must have place (from 0 to 1)
51 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned
52 interaction_cutoff : float = 0.1,
53 ) -> list:
54 """Find the residues of each interacting agent.
55 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions.
56 """
57 # Set the output filepath
58 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}'
60 # Set if we are to have mercy when an interaction fails
61 have_mercy = STABLE_INTERACTIONS_FLAG in mercy
63 # Copy input interactions to avoid input mutation
64 interactions = []
65 if type(input_interactions) == list:
66 # Duplicate the input interactions to avoid modifying the originals
67 interactions = [ { k:v for k,v in interaction.items() } for interaction in input_interactions ]
69 # Set the protocol for guessing interactions automatically
70 auto = interactions_auto
71 # If the protocol is simply "true" then consider the greedy option
72 if auto == True: auto = 'greedy'
74 # If interactions are to be automatically defined, there are two options
75 if auto:
76 # We only consider three cases: all chains interactions, only two or all chains vs one
77 if not auto == 'greedy' and not auto == 'humble' and not auto == 'ligands' and len(auto[0]) != 1:
78 raise InputError('Invalid input auto. Please select "greedy", "humble", "ligands" or the letter of the chain to be used')
79 if auto == 'greedy' or auto == 'humble' or auto == 'ligands':
80 print(f' |-> Processing interactions automatically. Option "{auto}" is selected')
81 else:
82 print(f' |-> Processing interactions automatically. Chain "{auto[0]}" is selected')
84 # Get structure chains which are not completely in PBC
85 target_selection = structure.select_protein()
86 target_structure = structure.filter(target_selection)
87 target_chains = [ chain for chain in target_structure.chains if chain.get_selection() - pbc_selection ]
88 # The greedy option is to find all possible interactions between chains
89 if auto == 'autogreedy' or auto == 'greedy':
90 # Use itertools to get all possible combinations of chains
91 for chain1, chain2 in itertools.combinations(target_chains, 2):
92 interaction = {
93 "name": f"chain {chain1.name}-chain {chain2.name} interaction",
94 "agent_1": f"chain {chain1.name}",
95 "agent_2": f"chain {chain2.name}",
96 "selection_1": f"chain {chain1.name}",
97 "selection_2": f"chain {chain2.name}"
98 }
99 interactions.append(interaction)
100 have_mercy = True
102 # The humble option is to find the interaction between two chains. If there are more than two chains then it won't work
103 elif auto == 'autohumble' or auto == 'humble':
104 # Make sure there are only 2 chains
105 if len(target_chains) != 2: raise InputError('With input "autohumble" there must be exactly 2 chains in the structure. If not, use "autogreedy" or select the interactions manually')
106 # If there are exactly two chains then we find the interaction between them
107 chain1 = target_chains[0]
108 chain2 = target_chains[1]
109 interaction = {
110 "name": f"chain {chain1.name}-chain {chain2.name} interaction",
111 "agent_1": f"chain {chain1.name}",
112 "agent_2": f"chain {chain2.name}",
113 "selection_1": f"chain {chain1.name}",
114 "selection_2": f"chain {chain2.name}"
115 }
116 interactions.append(interaction)
117 # If the input is a single letter then it means only one chain is selected so we find the interactions with all other chains
118 elif len(auto) == 1:
119 # Find the chain selected by the user
120 selected_chain = auto
121 matching_chain = next((chain for chain in target_chains if selected_chain == chain.name), None)
122 # If the chain is not found then raise an error
123 if not matching_chain:
124 raise InputError(f'Selected chain "{selected_chain}" is not present in the structure')
125 for chain in target_chains:
126 # Skip interaction of the selected chain with itself
127 if chain == selected_chain: continue
128 interaction = {
129 "name": f"chain {selected_chain.name}-chain {chain.name} interaction",
130 "agent_1": f"chain {selected_chain.name}",
131 "agent_2": f"chain {chain.name}",
132 "selection_1": f"chain {selected_chain.name}",
133 "selection_2": f"chain {chain.name}"
134 }
135 interactions.append(interaction)
137 # In this case, we assume that the user wants to interact the selected chain with all other chains so we force the analysis to be run anyway
138 have_mercy = True
140 elif auto == 'ligands':
141 # If there are no ligands present or matched in the structure then we skip ligand interactions
142 if not ligand_map:
143 raise InputError('No ligand map is detected. Skipping ligand interactions')
144 # Iterate over the list of matched ligands in the structure
145 for ligand in ligand_map:
146 # Get the ligand atom selection
147 ligand_selection = structure.select_residue_indices(ligand['residue_indices'])
148 # Get ligand fragments
149 # AGUS: De esta forma evitamos que si un mismo ligando aparece más de una vez en la simulación (solo habrá un ligand map con el mismo nombre),
150 # AGUS: se hagan las interacciones correspondientes con cada uno de estos, es decir, como si fueran ligandos diferentes (cada uno con su respectiva interacción)
151 ligand_fragments = structure.find_fragments(ligand_selection)
152 for lf, ligand_fragment in enumerate(ligand_fragments, 1):
153 # Match the ligand residue with the chain it belongs to and its classification
154 ligand_residue_indices = structure.get_selection_residue_indices(ligand_fragment)
155 residues = [ structure.residues[index] for index in ligand_residue_indices ]
156 ligand_chains = set([ residue.chain for residue in residues ])
157 # AGUS: la clasificacion del ligando deberia mejorarse para no ser 'Other Chain' sino 'ligand' --> modificar Chain
158 ligand_classifications = set([ chain.classification for chain in ligand_chains ])
159 # Create the ligand interaction with all chains
160 for chain in target_chains:
161 # Skip interaction of the ligand with itself
162 if chain in ligand_chains: continue
163 # Skip interaction if the ligand classifications are the same as the chain classification
164 if chain.classification in ligand_classifications: continue
165 interaction = {
166 "name": f"ligand {ligand['name']} {lf}-chain {chain.name} interaction",
167 "agent_1": f"ligand {ligand['name']} {lf}",
168 "agent_2": f"chain {chain.name}",
169 "selection_1": ligand_fragment.to_vmd(), # AGUS: parcheado por si hubiera más de un residuo por ligando ¿?
170 "selection_2": f"chain {chain.name}"
171 }
172 interactions.append(interaction)
173 have_mercy = True
175 else:
176 raise InputError(f'Invalid input auto interactions "{auto}"')
178 # If there are no interactions return an empty list
179 interaction_count = len(interactions)
180 if not interactions or interaction_count == 0:
181 return []
183 # If there is a backup then use it
184 # Load the backup and return its content as it is
185 if exists(output_analysis_filepath):
186 loaded_interactions = load_interactions(output_analysis_filepath, structure)
187 # Make sure the backup has atom indices
188 sample = loaded_interactions[0]
189 has_atom_indices = 'atom_indices_1' in sample
190 if has_atom_indices:
191 # Merge the loaded interactions with the input interactions to cover all fields
192 complete_interactions = []
193 for input_interaction, loaded_interaction in zip(interactions, loaded_interactions):
194 complete_interaction = { **input_interaction, **loaded_interaction }
195 complete_interactions.append(complete_interaction)
196 return complete_interactions
197 # Otherwise it means this is not a compatible version and we must run interactions again
198 warn('Interactions backup is obsolete. Interactions will be calculated again')
200 # If trajectory frames number is bigger than the limit we create a reduced trajectory
201 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(
202 structure_file,
203 trajectory_file,
204 snapshots,
205 frames_limit,
206 )
208 # Get the structure coarse grain selection for further reference
209 cg_selection = structure.select_cg()
211 # Iterate over each defined interaction
212 for interaction in interactions:
213 interaction_name = interaction["name"]
214 # Set the distance cutoff
215 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF)
216 # Find if this interaction has coarse grain atoms involved
217 agent_1_selection = structure.select(interaction['selection_1'])
218 agent_2_selection = structure.select(interaction['selection_2'])
219 has_agent_1_cg = bool(agent_1_selection & cg_selection)
220 has_agent_2_cg = bool(agent_2_selection & cg_selection)
221 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg
222 # Check if we are using the defualt atomistic distance while selections are coarse grain
223 # If this is the case then warn the user
224 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF:
225 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n'
226 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"')
227 # Find out the interaction residues for each frame and save all residues as the overall interface
228 interface_results = get_interface_atom_indices(
229 structure_file.path,
230 reduced_trajectory_filepath,
231 interaction['selection_1'],
232 interaction['selection_2'],
233 distance_cutoff
234 )
235 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it
236 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames']
237 pretty_frames_percent = str(round(frames_percent * 10000) / 100)
238 if frames_percent < interaction_cutoff:
239 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory'
240 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n'
241 f'This means the interaction {meaning_log}.\n'
242 'Check agent selections are correct or consider removing this interaction from the inputs.\n'
243 f' - Agent 1 selection: {interaction["selection_1"]}\n'
244 f' - Agent 2 selection: {interaction["selection_2"]}')
245 # If we are not to have mercy in case of interaction failure then stop here
246 if not have_mercy: raise TestFailure('An interaction failed to be set.\n'
247 'Use the "--mercy interact" flag for the workflow to continue.\n'
248 'Failed interactions will be ignored and will not appear in further analyses.')
249 # If the workflow is not to be killed then just remove this interaction from the interactions list
250 # Thus it will not be considered in interaction analyses and it will not appear in the metadata
251 # To not mess the interactions iteration we simply flag the interaction
252 # It will be removed further
253 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses')
254 interaction[FAILED_INTERACTION_FLAG] = True
255 continue
257 # Iterate interaction agents
258 for agent in ['1','2']:
259 # Get agent name and selection for logging purposes
260 agent_name = interaction['agent_' + agent]
261 agent_selection = interaction['selection_' + agent]
262 # Save atom indices in the interaction object
263 atom_indices = interface_results[f'selection_{agent}_atom_indices']
264 # This should never happen, but make sure they are not empty
265 if len(atom_indices) == 0:
266 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')
267 interaction[f'atom_indices_{agent}'] = atom_indices
268 # Save interface atom indices in the interaction object
269 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices']
270 # This should never happen, but make sure they are not empty
271 if len(interface_atom_indices) == 0:
272 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')
273 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices
275 # Add residue notations
276 add_residues_indices(interaction, structure)
278 # Find strong bonds between residues in different interfaces
279 # Use the main topology, which is corrected and thus will retrieve the right bonds
280 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2'])
281 interaction['strong_bonds'] = strong_bonds
283 # Save the interactions version
284 interaction['version'] = '2.0.0'
286 # Log the final results
287 interface_residue_indices = sorted(interaction["interface_residue_indices_1"]
288 + interaction["interface_residue_indices_2"])
289 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}')
291 # Filter away interactions whcih have failed
292 valid_interactions = [ inte for inte in interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ]
294 # If there are no valid interactions then stop here
295 if len(valid_interactions) == 0: return []
297 # Create interaction duplicates to avoid mutating the already processed interactions
298 # Then fill these duplicates only with those fields to be uploaded to the database
299 file_interactions = []
300 for interaction in valid_interactions:
301 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS }
302 file_interactions.append(file_interaction)
304 # Write them to disk
305 save_json(file_interactions, output_analysis_filepath, indent = 4)
307 # Finally return the processed interactions
308 print(f' There is a total of {len(valid_interactions)} valid interactions')
309 return valid_interactions
311# Load interactions from an already existing interactions file
312def load_interactions (output_analysis_filepath : str, structure : 'Structure') -> list:
313 print(f' Using already calculated interactions in {output_analysis_filepath}')
314 # The stored interactions should carry only residue indices and strong bonds
315 interactions = load_json(output_analysis_filepath)
316 # Now we must complete every interactions dict by adding residues in source format and pytraj format
317 for interaction in interactions:
318 # If the interaction failed then there will be minimal information
319 if interaction.get(FAILED_INTERACTION_FLAG, False):
320 continue
321 # Add residue notations, which are not saved to disk
322 add_residues_indices(interaction, structure)
323 return interactions
325# Set an auxiliar function to add residue indices to an interactions object
326def add_residues_indices (interaction : dict, structure : 'Structure'):
327 # Iterate interaction agents
328 for agent in ['1','2']:
329 # Get interaction atom indices
330 atom_indices = interaction[f'atom_indices_{agent}']
331 # Now parse atom indices to residue indices for those analysis which work with residues
332 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ])))
333 interaction[f'residue_indices_{agent}'] = residue_indices
334 # Get interaction interface atom indices
335 interface_atom_indices = interaction[f'interface_atom_indices_{agent}']
336 # Then with interface atoms/residues
337 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ])))
338 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices