Coverage for model_workflow/tools/process_interactions.py: 49%

144 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import itertools 

2from os.path import exists 

3 

4from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory 

5from model_workflow.utils.auxiliar import InputError, TestFailure, load_json, save_json, warn 

6from model_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME 

7from model_workflow.utils.type_hints import * 

8from model_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices 

9 

10# Set the default distance cutoff in Ångstroms (Å) 

11# This is useful for atomistic simulations 

12DEFAULT_DISTANCE_CUTOFF = 5 

13 

14# Set which fields are to be uploaded to the database only 

15# i.e. not vmd selections or residue numbers 

16UPLOAD_FIELDS = [ 

17 'name', 

18 'agent_1', 

19 'agent_2', 

20 'atom_indices_1', 

21 'atom_indices_2', 

22 'interface_atom_indices_1', 

23 'interface_atom_indices_2', 

24 'version', 

25 'strong_bonds', 

26 'has_cg' 

27] 

28 

29# Set the flag used to label failed interactions 

30FAILED_INTERACTION_FLAG = 'failed' 

31 

32# Find interfaces by computing a minimum distance between residues along the trajectory 

33# Residues are filtered by minimum distance along the trajectory 

34# Interface residues are listed apart 

35# The heavy results of interactions are stored in a json file which is uploaded to the database independently 

36# This file is also used as a backup here, since calculating interactions is a heavy calculation 

37# In addition, this file may be used to force interactions with custom interface residues manually 

38def process_interactions ( 

39 input_interactions : Optional[list], 

40 structure_file : 'File', 

41 trajectory_file : 'File', 

42 structure : 'Structure', 

43 snapshots : int, 

44 output_directory : str, 

45 mercy : List[str], 

46 interactions_auto : str, 

47 ligand_map : List[dict], 

48 pbc_selection : 'Selection', 

49 frames_limit : int = 1000, 

50 # Percent of frames where an interaction must have place (from 0 to 1) 

51 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned 

52 interaction_cutoff : float = 0.1, 

53 ) -> list: 

54 """Find the residues of each interacting agent. 

55 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions. 

56 """ 

57 # Set the output filepath 

58 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}' 

59 

60 # Set if we are to have mercy when an interaction fails 

61 have_mercy = STABLE_INTERACTIONS_FLAG in mercy 

62 

63 # Copy input interactions to avoid input mutation 

64 interactions = [] 

65 if type(input_interactions) == list: 

66 # Duplicate the input interactions to avoid modifying the originals 

67 interactions = [ { k:v for k,v in interaction.items() } for interaction in input_interactions ] 

68 

69 # Set the protocol for guessing interactions automatically 

70 auto = interactions_auto 

71 # If the protocol is simply "true" then consider the greedy option 

72 if auto == True: auto = 'greedy' 

73 

74 # If interactions are to be automatically defined, there are two options 

75 if auto: 

76 # We only consider three cases: all chains interactions, only two or all chains vs one 

77 if not auto == 'greedy' and not auto == 'humble' and not auto == 'ligands' and len(auto[0]) != 1: 

78 raise InputError('Invalid input auto. Please select "greedy", "humble", "ligands" or the letter of the chain to be used') 

79 if auto == 'greedy' or auto == 'humble' or auto == 'ligands': 

80 print(f' |-> Processing interactions automatically. Option "{auto}" is selected') 

81 else: 

82 print(f' |-> Processing interactions automatically. Chain "{auto[0]}" is selected') 

83 

84 # Get structure chains which are not completely in PBC 

85 target_selection = structure.select_protein() 

86 target_structure = structure.filter(target_selection) 

87 target_chains = [ chain for chain in target_structure.chains if chain.get_selection() - pbc_selection ] 

88 # The greedy option is to find all possible interactions between chains 

89 if auto == 'autogreedy' or auto == 'greedy': 

90 # Use itertools to get all possible combinations of chains 

91 for chain1, chain2 in itertools.combinations(target_chains, 2): 

92 interaction = { 

93 "name": f"chain {chain1.name}-chain {chain2.name} interaction", 

94 "agent_1": f"chain {chain1.name}", 

95 "agent_2": f"chain {chain2.name}", 

96 "selection_1": f"chain {chain1.name}", 

97 "selection_2": f"chain {chain2.name}" 

98 } 

99 interactions.append(interaction) 

100 have_mercy = True 

101 

102 # The humble option is to find the interaction between two chains. If there are more than two chains then it won't work 

103 elif auto == 'autohumble' or auto == 'humble': 

104 # Make sure there are only 2 chains 

105 if len(target_chains) != 2: raise InputError('With input "autohumble" there must be exactly 2 chains in the structure. If not, use "autogreedy" or select the interactions manually') 

106 # If there are exactly two chains then we find the interaction between them 

107 chain1 = target_chains[0] 

108 chain2 = target_chains[1] 

109 interaction = { 

110 "name": f"chain {chain1.name}-chain {chain2.name} interaction", 

111 "agent_1": f"chain {chain1.name}", 

112 "agent_2": f"chain {chain2.name}", 

113 "selection_1": f"chain {chain1.name}", 

114 "selection_2": f"chain {chain2.name}" 

115 } 

116 interactions.append(interaction) 

117 # If the input is a single letter then it means only one chain is selected so we find the interactions with all other chains 

118 elif len(auto) == 1: 

119 # Find the chain selected by the user 

120 selected_chain = auto 

121 matching_chain = next((chain for chain in target_chains if selected_chain == chain.name), None) 

122 # If the chain is not found then raise an error 

123 if not matching_chain: 

124 raise InputError(f'Selected chain "{selected_chain}" is not present in the structure') 

125 for chain in target_chains: 

126 # Skip interaction of the selected chain with itself 

127 if chain == selected_chain: continue 

128 interaction = { 

129 "name": f"chain {selected_chain.name}-chain {chain.name} interaction", 

130 "agent_1": f"chain {selected_chain.name}", 

131 "agent_2": f"chain {chain.name}", 

132 "selection_1": f"chain {selected_chain.name}", 

133 "selection_2": f"chain {chain.name}" 

134 } 

135 interactions.append(interaction) 

136 

137 # In this case, we assume that the user wants to interact the selected chain with all other chains so we force the analysis to be run anyway 

138 have_mercy = True 

139 

140 elif auto == 'ligands': 

141 # If there are no ligands present or matched in the structure then we skip ligand interactions 

142 if not ligand_map: 

143 raise InputError('No ligand map is detected. Skipping ligand interactions') 

144 # Iterate over the list of matched ligands in the structure 

145 for ligand in ligand_map: 

146 # Get the ligand atom selection 

147 ligand_selection = structure.select_residue_indices(ligand['residue_indices']) 

148 # Get ligand fragments 

149 # AGUS: De esta forma evitamos que si un mismo ligando aparece más de una vez en la simulación (solo habrá un ligand map con el mismo nombre), 

150 # AGUS: se hagan las interacciones correspondientes con cada uno de estos, es decir, como si fueran ligandos diferentes (cada uno con su respectiva interacción) 

151 ligand_fragments = structure.find_fragments(ligand_selection) 

152 for lf, ligand_fragment in enumerate(ligand_fragments, 1): 

153 # Match the ligand residue with the chain it belongs to and its classification 

154 ligand_residue_indices = structure.get_selection_residue_indices(ligand_fragment) 

155 residues = [ structure.residues[index] for index in ligand_residue_indices ] 

156 ligand_chains = set([ residue.chain for residue in residues ]) 

157 # AGUS: la clasificacion del ligando deberia mejorarse para no ser 'Other Chain' sino 'ligand' --> modificar Chain 

158 ligand_classifications = set([ chain.classification for chain in ligand_chains ]) 

159 # Create the ligand interaction with all chains 

160 for chain in target_chains: 

161 # Skip interaction of the ligand with itself 

162 if chain in ligand_chains: continue 

163 # Skip interaction if the ligand classifications are the same as the chain classification 

164 if chain.classification in ligand_classifications: continue 

165 interaction = { 

166 "name": f"ligand {ligand['name']} {lf}-chain {chain.name} interaction", 

167 "agent_1": f"ligand {ligand['name']} {lf}", 

168 "agent_2": f"chain {chain.name}", 

169 "selection_1": ligand_fragment.to_vmd(), # AGUS: parcheado por si hubiera más de un residuo por ligando ¿? 

170 "selection_2": f"chain {chain.name}" 

171 } 

172 interactions.append(interaction) 

173 have_mercy = True 

174 

175 else: 

176 raise InputError(f'Invalid input auto interactions "{auto}"') 

177 

178 # If there are no interactions return an empty list 

179 interaction_count = len(interactions) 

180 if not interactions or interaction_count == 0: 

181 return [] 

182 

183 # If there is a backup then use it 

184 # Load the backup and return its content as it is 

185 if exists(output_analysis_filepath): 

186 loaded_interactions = load_interactions(output_analysis_filepath, structure) 

187 # Make sure the backup has atom indices 

188 sample = loaded_interactions[0] 

189 has_atom_indices = 'atom_indices_1' in sample 

190 if has_atom_indices: 

191 # Merge the loaded interactions with the input interactions to cover all fields 

192 complete_interactions = [] 

193 for input_interaction, loaded_interaction in zip(interactions, loaded_interactions): 

194 complete_interaction = { **input_interaction, **loaded_interaction } 

195 complete_interactions.append(complete_interaction) 

196 return complete_interactions 

197 # Otherwise it means this is not a compatible version and we must run interactions again 

198 warn('Interactions backup is obsolete. Interactions will be calculated again') 

199 

200 # If trajectory frames number is bigger than the limit we create a reduced trajectory 

201 reduced_trajectory_filepath, step, frames = get_reduced_trajectory( 

202 structure_file, 

203 trajectory_file, 

204 snapshots, 

205 frames_limit, 

206 ) 

207 

208 # Get the structure coarse grain selection for further reference 

209 cg_selection = structure.select_cg() 

210 

211 # Iterate over each defined interaction 

212 for interaction in interactions: 

213 interaction_name = interaction["name"] 

214 # Set the distance cutoff 

215 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF) 

216 # Find if this interaction has coarse grain atoms involved 

217 agent_1_selection = structure.select(interaction['selection_1']) 

218 agent_2_selection = structure.select(interaction['selection_2']) 

219 has_agent_1_cg = bool(agent_1_selection & cg_selection) 

220 has_agent_2_cg = bool(agent_2_selection & cg_selection) 

221 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg 

222 # Check if we are using the defualt atomistic distance while selections are coarse grain 

223 # If this is the case then warn the user 

224 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF: 

225 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n' 

226 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"') 

227 # Find out the interaction residues for each frame and save all residues as the overall interface 

228 interface_results = get_interface_atom_indices( 

229 structure_file.path, 

230 reduced_trajectory_filepath, 

231 interaction['selection_1'], 

232 interaction['selection_2'], 

233 distance_cutoff 

234 ) 

235 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it 

236 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames'] 

237 pretty_frames_percent = str(round(frames_percent * 10000) / 100) 

238 if frames_percent < interaction_cutoff: 

239 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory' 

240 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n' 

241 f'This means the interaction {meaning_log}.\n' 

242 'Check agent selections are correct or consider removing this interaction from the inputs.\n' 

243 f' - Agent 1 selection: {interaction["selection_1"]}\n' 

244 f' - Agent 2 selection: {interaction["selection_2"]}') 

245 # If we are not to have mercy in case of interaction failure then stop here 

246 if not have_mercy: raise TestFailure('An interaction failed to be set.\n' 

247 'Use the "--mercy interact" flag for the workflow to continue.\n' 

248 'Failed interactions will be ignored and will not appear in further analyses.') 

249 # If the workflow is not to be killed then just remove this interaction from the interactions list 

250 # Thus it will not be considered in interaction analyses and it will not appear in the metadata 

251 # To not mess the interactions iteration we simply flag the interaction 

252 # It will be removed further 

253 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses') 

254 interaction[FAILED_INTERACTION_FLAG] = True 

255 continue 

256 

257 # Iterate interaction agents 

258 for agent in ['1','2']: 

259 # Get agent name and selection for logging purposes 

260 agent_name = interaction['agent_' + agent] 

261 agent_selection = interaction['selection_' + agent] 

262 # Save atom indices in the interaction object 

263 atom_indices = interface_results[f'selection_{agent}_atom_indices'] 

264 # This should never happen, but make sure they are not empty 

265 if len(atom_indices) == 0: 

266 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}') 

267 interaction[f'atom_indices_{agent}'] = atom_indices 

268 # Save interface atom indices in the interaction object 

269 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices'] 

270 # This should never happen, but make sure they are not empty 

271 if len(interface_atom_indices) == 0: 

272 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}') 

273 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices 

274 

275 # Add residue notations 

276 add_residues_indices(interaction, structure) 

277 

278 # Find strong bonds between residues in different interfaces 

279 # Use the main topology, which is corrected and thus will retrieve the right bonds 

280 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2']) 

281 interaction['strong_bonds'] = strong_bonds 

282 

283 # Save the interactions version 

284 interaction['version'] = '2.0.0' 

285 

286 # Log the final results 

287 interface_residue_indices = sorted(interaction["interface_residue_indices_1"] 

288 + interaction["interface_residue_indices_2"]) 

289 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}') 

290 

291 # Filter away interactions whcih have failed 

292 valid_interactions = [ inte for inte in interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ] 

293 

294 # If there are no valid interactions then stop here 

295 if len(valid_interactions) == 0: return [] 

296 

297 # Create interaction duplicates to avoid mutating the already processed interactions 

298 # Then fill these duplicates only with those fields to be uploaded to the database 

299 file_interactions = [] 

300 for interaction in valid_interactions: 

301 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS } 

302 file_interactions.append(file_interaction) 

303 

304 # Write them to disk 

305 save_json(file_interactions, output_analysis_filepath, indent = 4) 

306 

307 # Finally return the processed interactions 

308 print(f' There is a total of {len(valid_interactions)} valid interactions') 

309 return valid_interactions 

310 

311# Load interactions from an already existing interactions file 

312def load_interactions (output_analysis_filepath : str, structure : 'Structure') -> list: 

313 print(f' Using already calculated interactions in {output_analysis_filepath}') 

314 # The stored interactions should carry only residue indices and strong bonds 

315 interactions = load_json(output_analysis_filepath) 

316 # Now we must complete every interactions dict by adding residues in source format and pytraj format 

317 for interaction in interactions: 

318 # If the interaction failed then there will be minimal information 

319 if interaction.get(FAILED_INTERACTION_FLAG, False): 

320 continue 

321 # Add residue notations, which are not saved to disk 

322 add_residues_indices(interaction, structure) 

323 return interactions 

324 

325# Set an auxiliar function to add residue indices to an interactions object 

326def add_residues_indices (interaction : dict, structure : 'Structure'): 

327 # Iterate interaction agents 

328 for agent in ['1','2']: 

329 # Get interaction atom indices 

330 atom_indices = interaction[f'atom_indices_{agent}'] 

331 # Now parse atom indices to residue indices for those analysis which work with residues 

332 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ]))) 

333 interaction[f'residue_indices_{agent}'] = residue_indices 

334 # Get interaction interface atom indices 

335 interface_atom_indices = interaction[f'interface_atom_indices_{agent}'] 

336 # Then with interface atoms/residues 

337 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ]))) 

338 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices