Coverage for mddb_workflow/tools/process_interactions.py: 71%

139 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1import itertools 

2import shlex 

3 

4from mddb_workflow.tools.get_reduced_trajectory import get_reduced_trajectory 

5from mddb_workflow.utils.auxiliar import InputError, TestFailure, save_json, warn, reprint 

6from mddb_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME 

7from mddb_workflow.utils.type_hints import * 

8from mddb_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices 

9 

10# Set the default distance cutoff in Ångstroms (Å) 

11# This is useful for atomistic simulations 

12DEFAULT_DISTANCE_CUTOFF = 5 

13 

14# Set which fields are to be uploaded to the database only 

15# i.e. not vmd selections or residue numbers 

16UPLOAD_FIELDS = { 

17 'name', 

18 'agent_1', 

19 'agent_2', 

20 'atom_indices_1', 

21 'atom_indices_2', 

22 'interface_atom_indices_1', 

23 'interface_atom_indices_2', 

24 'version', 

25 'strong_bonds', 

26 'has_cg' 

27} 

28 

29# Set the flag used to label failed interactions 

30FAILED_INTERACTION_FLAG = 'failed' 

31 

32# Find interfaces by computing a minimum distance between residues along the trajectory 

33# Residues are filtered by minimum distance along the trajectory 

34# Interface residues are listed apart 

35# The heavy results of interactions are stored in a json file which is uploaded to the database independently 

36# In addition, this file may be used to force interactions with custom interface residues manually 

37def process_interactions ( 

38 input_interactions : Optional[list], 

39 structure_file : 'File', 

40 trajectory_file : 'File', 

41 structure : 'Structure', 

42 snapshots : int, 

43 output_directory : str, 

44 mercy : list[str], 

45 interactions_auto : str, 

46 pbc_selection : 'Selection', 

47 frames_limit : int = 1000, 

48 # Percent of frames where an interaction must have place (from 0 to 1) 

49 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned 

50 interaction_cutoff : float = 0.1, 

51 ) -> list: 

52 """Find the residues of each interacting agent. 

53 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions. 

54 """ 

55 # Set our own internal interactions 

56 interactions = [] 

57 

58 # If there are no interactions then stop here 

59 if input_interactions == None: return [] 

60 # If interactions is not a list then make it a list of it 

61 # if it is already a list then copy it to avoid mutating the original 

62 if type(input_interactions) == list: 

63 interactions = [ *input_interactions ] 

64 else: 

65 interactions = [input_interactions] 

66 # If interactions is an empty list then stop here 

67 if len(input_interactions) == 0: return [] 

68 

69 # Get explicit interactions (not keywords) 

70 # Duplicate input interactions to avoid modifying the originals 

71 explicit_interactions = [ 

72 { k:v for k,v in inter.items() } for inter in interactions if type(inter) == dict ] 

73 

74 # Since this is the first time we read the input interactions, we must make sure they are correct 

75 # Make sure there are no interactions with the same name 

76 interaction_names = [ interaction['name'] for interaction in explicit_interactions ] 

77 if len(set(interaction_names)) < len(interaction_names): 

78 raise InputError('Interactions must have unique names') 

79 # Check input interactions to have every expected field and make sure they are coherent 

80 for interaction in explicit_interactions: 

81 name = interaction["name"] 

82 # Check agents have different names 

83 if interaction['agent_1'] == interaction['agent_2']: 

84 raise InputError(f'Interaction agents must have different names at {name}') 

85 # Check agents have different selections 

86 input_agent_1_selection = interaction['selection_1'] 

87 input_agent_2_selection = interaction['selection_2'] 

88 if input_agent_1_selection == input_agent_2_selection: 

89 raise InputError(f'Interaction agents must have different selections at {name}') 

90 # Make sure both agents have valid selections 

91 agent_1_selection = structure.select(input_agent_1_selection) 

92 if not agent_1_selection: 

93 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 1 ({interaction["agent_1"]}): {interaction["selection_1"]}') 

94 agent_2_selection = structure.select(input_agent_2_selection) 

95 if not agent_2_selection: 

96 raise InputError(f'Interaction "{name}" has a non valid (or empty) selection for agent 2 ({interaction["agent_2"]}): {interaction["selection_2"]}') 

97 # Make sure selections do not overlap at all 

98 # This makes not sense as interactions are implemented in this workflow 

99 overlap = agent_1_selection & agent_2_selection 

100 if overlap: raise InputError( 

101 f'Agents in interaction "{name}" have {len(overlap)} overlapping atoms.\n' + 

102 f'This means that the atom selection in agent 1 ({input_agent_1_selection})' + 

103 f' and the atom selection in agent 2 ({input_agent_2_selection}) aim for' + 

104 f' {len(overlap)} atoms in common.' + 

105 f' This is not supported since it makes no sense in this context ' + 

106 f'to check the interaction of a group of atoms against themselves.\n' 

107 f'Please change the atom selections for this interaction.') 

108 

109 # Get keywords from the input interactions 

110 # Right now the only supported instruction is 'auto' 

111 keyword_interactions = set([ inter for inter in interactions if type(inter) == str ]) 

112 

113 # If the iauto argument is used from the console then add it here 

114 if interactions_auto: 

115 keyword_interactions.add( f'auto "{interactions_auto}"' ) 

116 

117 # Now process keyword interactions 

118 for keyword in keyword_interactions: 

119 # Parse the keyword 

120 arguments = shlex.split(keyword) 

121 # Depending on the header we set the behaviour 

122 header = arguments[0] 

123 options = arguments[1:] 

124 # Find all possible interactions in a specific region 

125 # All the structure is used by default if no selection is passed 

126 # Note that an interaction is set by every possible combination of pairs of chains 

127 if header == 'auto': 

128 print(f' Processing interactions automatically') 

129 if len(options) > 1: raise InputError('Automatic interactions support one selection only.\n'+ 

130 f' Your instruction was: auto {" ".join(options)}\n'+ 

131 f' Did you forget to add the quotes maybe? Try this: auto "{" ".join(options)}"') 

132 # Get the structure regions which are not in PBC 

133 reference_structure = structure.filter(pbc_selection) if pbc_selection else structure 

134 # Set the selection of atoms where this logic is to be applied, if any 

135 # If a selection is passed then filter the structure before finding its chains 

136 if len(options) == 1: 

137 selection = options[0] 

138 parsed_selection = structure.select(selection) 

139 if not parsed_selection: 

140 raise InputError(f'Selection for automatic interaction "{selection}" is empty') 

141 reference_structure = structure.filter(parsed_selection) 

142 # Iterate possible combinations of chains 

143 for chain1, chain2 in itertools.combinations(reference_structure.chains, 2): 

144 interaction = { 

145 "name": f"chain {chain1.name}-chain {chain2.name} interaction", 

146 "agent_1": f"chain {chain1.name}", 

147 "agent_2": f"chain {chain2.name}", 

148 "selection_1": f"chain {chain1.name}", 

149 "selection_2": f"chain {chain2.name}", 

150 "auto": True, 

151 } 

152 # Now add the explicit interaction to the list 

153 explicit_interactions.append(interaction) 

154 # If we do not recognize the keyword header then we complain 

155 else: 

156 raise InputError(f'Instruction "{header}" not supported as interactions input') 

157 

158 # If there are no interactions at this point then return an empty list 

159 interaction_count = len(explicit_interactions) 

160 if interaction_count == 0: return [] 

161 

162 # Set the output filepath 

163 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}' 

164 

165 # If trajectory frames number is bigger than the limit we create a reduced trajectory 

166 reduced_trajectory_filepath, step, frames = get_reduced_trajectory( 

167 structure_file, 

168 trajectory_file, 

169 snapshots, 

170 frames_limit, 

171 ) 

172 

173 # Get the structure coarse grain selection for further reference 

174 cg_selection = structure.select_cg() 

175 

176 # Set if we are to have mercy when an interaction fails 

177 have_mercy = STABLE_INTERACTIONS_FLAG in mercy 

178 

179 # Iterate over each defined interaction 

180 for interaction in explicit_interactions: 

181 interaction_name = interaction["name"] 

182 # Set the distance cutoff 

183 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF) 

184 # Find if this interaction has coarse grain atoms involved 

185 agent_1_selection = structure.select(interaction['selection_1']) 

186 agent_2_selection = structure.select(interaction['selection_2']) 

187 has_agent_1_cg = bool(agent_1_selection & cg_selection) 

188 has_agent_2_cg = bool(agent_2_selection & cg_selection) 

189 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg 

190 # Check if we are using the defualt atomistic distance while selections are coarse grain 

191 # If this is the case then warn the user 

192 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF: 

193 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n' 

194 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"') 

195 # Find out the interaction residues for each frame and save all residues as the overall interface 

196 interface_results = get_interface_atom_indices( 

197 structure_file.path, 

198 reduced_trajectory_filepath, 

199 interaction['selection_1'], 

200 interaction['selection_2'], 

201 distance_cutoff 

202 ) 

203 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it 

204 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames'] 

205 pretty_frames_percent = str(round(frames_percent * 10000) / 100) 

206 if frames_percent < interaction_cutoff: 

207 # If this interaction was set automatically then simply skip it silently 

208 if interaction.get('auto', False): 

209 interaction[FAILED_INTERACTION_FLAG] = True 

210 continue 

211 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory' 

212 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n' 

213 f'This means the interaction {meaning_log}.\n' 

214 'Check agent selections are correct or consider removing this interaction from the inputs.\n' 

215 f' - Agent 1 selection: {interaction["selection_1"]}\n' 

216 f' - Agent 2 selection: {interaction["selection_2"]}') 

217 # If we are not to have mercy in case of interaction failure then stop here 

218 if not have_mercy: raise TestFailure('An interaction failed to be set.\n' 

219 'Use the "--mercy interact" flag for the workflow to continue.\n' 

220 'Failed interactions will be ignored and will not appear in further analyses.') 

221 # If the workflow is not to be killed then just remove this interaction from the interactions list 

222 # Thus it will not be considered in interaction analyses and it will not appear in the metadata 

223 # To not mess the interactions iteration we simply flag the interaction 

224 # It will be removed further 

225 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses') 

226 interaction[FAILED_INTERACTION_FLAG] = True 

227 continue 

228 

229 # Iterate interaction agents 

230 for agent in ['1','2']: 

231 # Get agent name and selection for logging purposes 

232 agent_name = interaction['agent_' + agent] 

233 agent_selection = interaction['selection_' + agent] 

234 # Save atom indices in the interaction object 

235 atom_indices = interface_results[f'selection_{agent}_atom_indices'] 

236 # This should never happen, but make sure they are not empty 

237 if len(atom_indices) == 0: 

238 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}') 

239 interaction[f'atom_indices_{agent}'] = atom_indices 

240 # Save interface atom indices in the interaction object 

241 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices'] 

242 # This should never happen, but make sure they are not empty 

243 if len(interface_atom_indices) == 0: 

244 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}') 

245 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices 

246 

247 # Add residue notations 

248 add_residues_indices(interaction, structure) 

249 

250 # Find strong bonds between residues in different interfaces 

251 # Use the main topology, which is corrected and thus will retrieve the right bonds 

252 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2']) 

253 interaction['strong_bonds'] = strong_bonds 

254 

255 # If one of the agents is fully made of strong bonds then the interaction is not valid 

256 strong_bonded_atoms = set(sum(strong_bonds, [])) 

257 if all( index in strong_bonded_atoms for index in agent_1_selection.atom_indices ) or \ 

258 all( index in strong_bonded_atoms for index in agent_2_selection.atom_indices ): 

259 warn(f'Interaction "{interaction_name}" is not valid since one of the agents is fully bonded to the other agent.\n' 

260 'This may be due to wrong selections or a wrong interaction to be considered.\n' 

261 'This interaction will be ignored and will not appear in further analyses') 

262 interaction[FAILED_INTERACTION_FLAG] = True 

263 continue 

264 

265 # Save the interactions version 

266 interaction['version'] = '2.0.0' 

267 

268 # Log the final results 

269 interface_residue_indices = sorted(interaction["interface_residue_indices_1"] 

270 + interaction["interface_residue_indices_2"]) 

271 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}') 

272 

273 # Filter away interactions which have failed 

274 valid_interactions = [ inte for inte in explicit_interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ] 

275 interaction_count = len(valid_interactions) 

276 print(f'There is a total of {interaction_count} valid interactions') 

277 

278 # If there are no valid interactions then stop here 

279 if interaction_count == 0: return [] 

280 

281 # Print an empty line for the next reprint 

282 print() 

283 # Check input interactions to be correct 

284 for i, interaction in enumerate(valid_interactions, 1): 

285 name = interaction["name"] 

286 agent_1_selection = structure.select(interaction['selection_1']) 

287 agent_2_selection = structure.select(interaction['selection_2']) 

288 reprint(f' Finding interaction type in {name} ({i}/{interaction_count})') 

289 # Check if there was a type already assigned to the interaction 

290 # This is not supported anymore since the interaction type is set automatically 

291 if 'type' in interaction: 

292 warn(f'Interaction type "{interaction["type"]}" is set for interaction "{name}".\n' 

293 'Interaction type is now calculated and the input interaction type is no longer supported.\n' 

294 'Note that the input value will be ignored') 

295 # Set the interaction type 

296 # LORE: The type was a user input back in time but now we find it automatically 

297 # WARNING: Do not calculate the type from the interface residue instead of the whole agent 

298 # WARNING: This seems more coherent BUT the type will be written in the PROJECT metadata 

299 # WARNING: Interaction type is a valuable search parameter and thus it must remain in project metadata 

300 # WARNING: However we could have different types in different MDs, if the interaction is different 

301 agent_1_classification = structure.get_selection_classification(agent_1_selection) 

302 agent_2_classification = structure.get_selection_classification(agent_2_selection) 

303 alphabetically_sorted = sorted([agent_1_classification, agent_2_classification]) 

304 interaction['type'] = f'{alphabetically_sorted[0]}-{alphabetically_sorted[1]}' 

305 

306 # Create interaction duplicates to avoid mutating the already processed interactions 

307 # Then fill these duplicates only with those fields to be uploaded to the database 

308 file_interactions = [] 

309 for interaction in valid_interactions: 

310 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS } 

311 file_interactions.append(file_interaction) 

312 

313 # Write them to disk 

314 save_json(file_interactions, output_analysis_filepath, indent = 4) 

315 

316 # Finally return the processed interactions 

317 return valid_interactions 

318 

319# Set an auxiliar function to add residue indices to an interactions object 

320def add_residues_indices (interaction : dict, structure : 'Structure'): 

321 # Iterate interaction agents 

322 for agent in ['1','2']: 

323 # Get interaction atom indices 

324 atom_indices = interaction[f'atom_indices_{agent}'] 

325 # Now parse atom indices to residue indices for those analysis which work with residues 

326 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ]))) 

327 interaction[f'residue_indices_{agent}'] = residue_indices 

328 # Get interaction interface atom indices 

329 interface_atom_indices = interaction[f'interface_atom_indices_{agent}'] 

330 # Then with interface atoms/residues 

331 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ]))) 

332 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices