Coverage for model_workflow/tools/process

1import itertools

2from os.path import exists

4from model_workflow.tools.get_reduced_trajectory import get_reduced_trajectory

5from model_workflow.utils.auxiliar import InputError, TestFailure, load_json, save_json, warn

6from model_workflow.utils.constants import STABLE_INTERACTIONS_FLAG, OUTPUT_INTERACTIONS_FILENAME

7from model_workflow.utils.type_hints import *

8from model_workflow.utils.vmd_spells import get_covalent_bonds_between, get_interface_atom_indices

10# Set the default distance cutoff in Ångstroms (Å)

11# This is useful for atomistic simulations

12DEFAULT_DISTANCE_CUTOFF = 5

14# Set which fields are to be uploaded to the database only

15# i.e. not vmd selections or residue numbers

16UPLOAD_FIELDS = [

17 'name',

18 'agent_1',

19 'agent_2',

20 'atom_indices_1',

21 'atom_indices_2',

22 'interface_atom_indices_1',

23 'interface_atom_indices_2',

24 'version',

25 'strong_bonds',

26 'has_cg'

27]

29# Set the flag used to label failed interactions

30FAILED_INTERACTION_FLAG = 'failed'

32# Find interfaces by computing a minimum distance between residues along the trajectory

33# Residues are filtered by minimum distance along the trajectory

34# Interface residues are listed apart

35# The heavy results of interactions are stored in a json file which is uploaded to the database independently

36# This file is also used as a backup here, since calculating interactions is a heavy calculation

37# In addition, this file may be used to force interactions with custom interface residues manually

38def process_interactions (

39 input_interactions : Optional[list],

40 structure_file : 'File',

41 trajectory_file : 'File',

42 structure : 'Structure',

43 snapshots : int,

44 output_directory : str,

45 mercy : List[str],

46 interactions_auto : str,

47 ligand_map : List[dict],

48 pbc_selection : 'Selection',

49 frames_limit : int = 1000,

50 # Percent of frames where an interaction must have place (from 0 to 1)

51 # If the interactions fails to pass the cutoff then the workflow is killed and the user is warned

52 interaction_cutoff : float = 0.1,

53 ) -> list:

54 """Find the residues of each interacting agent.

55 It can automatically detect interactions based on chain names or ligand information, or use a predefined list of interactions.

56 """

57 # Set the output filepath

58 output_analysis_filepath = f'{output_directory}/{OUTPUT_INTERACTIONS_FILENAME}'

60 # Set if we are to have mercy when an interaction fails

61 have_mercy = STABLE_INTERACTIONS_FLAG in mercy

63 # Copy input interactions to avoid input mutation

64 interactions = []

65 if type(input_interactions) == list:

66 # Duplicate the input interactions to avoid modifying the originals

67 interactions = [ { k:v for k,v in interaction.items() } for interaction in input_interactions ]

69 # Set the protocol for guessing interactions automatically

70 auto = interactions_auto

71 # If the protocol is simply "true" then consider the greedy option

72 if auto == True: auto = 'greedy'

74 # If interactions are to be automatically defined, there are two options

75 if auto:

76 # We only consider three cases: all chains interactions, only two or all chains vs one

77 if not auto == 'greedy' and not auto == 'humble' and not auto == 'ligands' and len(auto[0]) != 1:

78 raise InputError('Invalid input auto. Please select "greedy", "humble", "ligands" or the letter of the chain to be used')

79 if auto == 'greedy' or auto == 'humble' or auto == 'ligands':

80 print(f' |-> Processing interactions automatically. Option "{auto}" is selected')

81 else:

82 print(f' |-> Processing interactions automatically. Chain "{auto[0]}" is selected')

84 # Get structure chains which are not completely in PBC

85 target_selection = structure.select_protein()

86 target_structure = structure.filter(target_selection)

87 target_chains = [ chain for chain in target_structure.chains if chain.get_selection() - pbc_selection ]

88 # The greedy option is to find all possible interactions between chains

89 if auto == 'autogreedy' or auto == 'greedy':

90 # Use itertools to get all possible combinations of chains

91 for chain1, chain2 in itertools.combinations(target_chains, 2):

92 interaction = {

93 "name": f"chain {chain1.name}-chain {chain2.name} interaction",

94 "agent_1": f"chain {chain1.name}",

95 "agent_2": f"chain {chain2.name}",

96 "selection_1": f"chain {chain1.name}",

97 "selection_2": f"chain {chain2.name}"

98 }

99 interactions.append(interaction)

100 have_mercy = True

101

102 # The humble option is to find the interaction between two chains. If there are more than two chains then it won't work

103 elif auto == 'autohumble' or auto == 'humble':

104 # Make sure there are only 2 chains

105 if len(target_chains) != 2: raise InputError('With input "autohumble" there must be exactly 2 chains in the structure. If not, use "autogreedy" or select the interactions manually')

106 # If there are exactly two chains then we find the interaction between them

107 chain1 = target_chains[0]

108 chain2 = target_chains[1]

109 interaction = {

110 "name": f"chain {chain1.name}-chain {chain2.name} interaction",

111 "agent_1": f"chain {chain1.name}",

112 "agent_2": f"chain {chain2.name}",

113 "selection_1": f"chain {chain1.name}",

114 "selection_2": f"chain {chain2.name}"

115 }

116 interactions.append(interaction)

117 # If the input is a single letter then it means only one chain is selected so we find the interactions with all other chains

118 elif len(auto) == 1:

119 # Find the chain selected by the user

120 selected_chain = auto

121 matching_chain = next((chain for chain in target_chains if selected_chain == chain.name), None)

122 # If the chain is not found then raise an error

123 if not matching_chain:

124 raise InputError(f'Selected chain "{selected_chain}" is not present in the structure')

125 for chain in target_chains:

126 # Skip interaction of the selected chain with itself

127 if chain == selected_chain: continue

128 interaction = {

129 "name": f"chain {selected_chain.name}-chain {chain.name} interaction",

130 "agent_1": f"chain {selected_chain.name}",

131 "agent_2": f"chain {chain.name}",

132 "selection_1": f"chain {selected_chain.name}",

133 "selection_2": f"chain {chain.name}"

134 }

135 interactions.append(interaction)

136

137 # In this case, we assume that the user wants to interact the selected chain with all other chains so we force the analysis to be run anyway

138 have_mercy = True

139

140 elif auto == 'ligands':

141 # If there are no ligands present or matched in the structure then we skip ligand interactions

142 if not ligand_map:

143 raise InputError('No ligand map is detected. Skipping ligand interactions')

144 # Iterate over the list of matched ligands in the structure

145 for ligand in ligand_map:

146 # Get the ligand atom selection

147 ligand_selection = structure.select_residue_indices(ligand['residue_indices'])

148 # Get ligand fragments

149 # AGUS: De esta forma evitamos que si un mismo ligando aparece más de una vez en la simulación (solo habrá un ligand map con el mismo nombre),

150 # AGUS: se hagan las interacciones correspondientes con cada uno de estos, es decir, como si fueran ligandos diferentes (cada uno con su respectiva interacción)

151 ligand_fragments = structure.find_fragments(ligand_selection)

152 for lf, ligand_fragment in enumerate(ligand_fragments, 1):

153 # Match the ligand residue with the chain it belongs to and its classification

154 ligand_residue_indices = structure.get_selection_residue_indices(ligand_fragment)

155 residues = [ structure.residues[index] for index in ligand_residue_indices ]

156 ligand_chains = set([ residue.chain for residue in residues ])

157 # AGUS: la clasificacion del ligando deberia mejorarse para no ser 'Other Chain' sino 'ligand' --> modificar Chain

158 ligand_classifications = set([ chain.classification for chain in ligand_chains ])

159 # Create the ligand interaction with all chains

160 for chain in target_chains:

161 # Skip interaction of the ligand with itself

162 if chain in ligand_chains: continue

163 # Skip interaction if the ligand classifications are the same as the chain classification

164 if chain.classification in ligand_classifications: continue

165 interaction = {

166 "name": f"ligand {ligand['name']} {lf}-chain {chain.name} interaction",

167 "agent_1": f"ligand {ligand['name']} {lf}",

168 "agent_2": f"chain {chain.name}",

169 "selection_1": ligand_fragment.to_vmd(), # AGUS: parcheado por si hubiera más de un residuo por ligando ¿?

170 "selection_2": f"chain {chain.name}"

171 }

172 interactions.append(interaction)

173 have_mercy = True

174

175 else:

176 raise InputError(f'Invalid input auto interactions "{auto}"')

177

178 # If there are no interactions return an empty list

179 interaction_count = len(interactions)

180 if not interactions or interaction_count == 0:

181 return []

182

183 # If there is a backup then use it

184 # Load the backup and return its content as it is

185 if exists(output_analysis_filepath):

186 loaded_interactions = load_interactions(output_analysis_filepath, structure)

187 # Make sure the backup has atom indices

188 sample = loaded_interactions[0]

189 has_atom_indices = 'atom_indices_1' in sample

190 if has_atom_indices:

191 # Merge the loaded interactions with the input interactions to cover all fields

192 complete_interactions = []

193 for input_interaction, loaded_interaction in zip(interactions, loaded_interactions):

194 complete_interaction = { **input_interaction, **loaded_interaction }

195 complete_interactions.append(complete_interaction)

196 return complete_interactions

197 # Otherwise it means this is not a compatible version and we must run interactions again

198 warn('Interactions backup is obsolete. Interactions will be calculated again')

199

200 # If trajectory frames number is bigger than the limit we create a reduced trajectory

201 reduced_trajectory_filepath, step, frames = get_reduced_trajectory(

202 structure_file,

203 trajectory_file,

204 snapshots,

205 frames_limit,

206 )

207

208 # Get the structure coarse grain selection for further reference

209 cg_selection = structure.select_cg()

210

211 # Iterate over each defined interaction

212 for interaction in interactions:

213 interaction_name = interaction["name"]

214 # Set the distance cutoff

215 distance_cutoff = interaction.get('distance_cutoff', DEFAULT_DISTANCE_CUTOFF)

216 # Find if this interaction has coarse grain atoms involved

217 agent_1_selection = structure.select(interaction['selection_1'])

218 agent_2_selection = structure.select(interaction['selection_2'])

219 has_agent_1_cg = bool(agent_1_selection & cg_selection)

220 has_agent_2_cg = bool(agent_2_selection & cg_selection)

221 interaction['has_cg'] = has_agent_1_cg or has_agent_2_cg

222 # Check if we are using the defualt atomistic distance while selections are coarse grain

223 # If this is the case then warn the user

224 if interaction['has_cg'] and distance_cutoff == DEFAULT_DISTANCE_CUTOFF:

225 warn(f'Using atomistic default distance cutoff ({distance_cutoff}Å) with coarse grain agent(s)\n'

226 f' You may need to manually specify the distance cutoff in the inputs file for interaction "{interaction_name}"')

227 # Find out the interaction residues for each frame and save all residues as the overall interface

228 interface_results = get_interface_atom_indices(

229 structure_file.path,

230 reduced_trajectory_filepath,

231 interaction['selection_1'],

232 interaction['selection_2'],

233 distance_cutoff

234 )

235 # Check if the interaction is respecting the frames percent cutoff and if it fails then kill it

236 frames_percent = interface_results['interacting_frames'] / interface_results['total_frames']

237 pretty_frames_percent = str(round(frames_percent * 10000) / 100)

238 if frames_percent < interaction_cutoff:

239 meaning_log = 'is not happening at all' if frames_percent == 0 else 'is happening only in a small percent of the trajectory'

240 print(f'Interaction "{interaction_name}" is not reaching the frames percent cutoff of {interaction_cutoff} ({pretty_frames_percent}).\n'

241 f'This means the interaction {meaning_log}.\n'

242 'Check agent selections are correct or consider removing this interaction from the inputs.\n'

243 f' - Agent 1 selection: {interaction["selection_1"]}\n'

244 f' - Agent 2 selection: {interaction["selection_2"]}')

245 # If we are not to have mercy in case of interaction failure then stop here

246 if not have_mercy: raise TestFailure('An interaction failed to be set.\n'

247 'Use the "--mercy interact" flag for the workflow to continue.\n'

248 'Failed interactions will be ignored and will not appear in further analyses.')

249 # If the workflow is not to be killed then just remove this interaction from the interactions list

250 # Thus it will not be considered in interaction analyses and it will not appear in the metadata

251 # To not mess the interactions iteration we simply flag the interaction

252 # It will be removed further

253 warn(f'Interaction "{interaction_name}" will be ignored and will not appear in further analyses')

254 interaction[FAILED_INTERACTION_FLAG] = True

255 continue

256

257 # Iterate interaction agents

258 for agent in ['1','2']:

259 # Get agent name and selection for logging purposes

260 agent_name = interaction['agent_' + agent]

261 agent_selection = interaction['selection_' + agent]

262 # Save atom indices in the interaction object

263 atom_indices = interface_results[f'selection_{agent}_atom_indices']

264 # This should never happen, but make sure they are not empty

265 if len(atom_indices) == 0:

266 raise ValueError(f'Empty agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')

267 interaction[f'atom_indices_{agent}'] = atom_indices

268 # Save interface atom indices in the interaction object

269 interface_atom_indices = interface_results[f'selection_{agent}_interface_atom_indices']

270 # This should never happen, but make sure they are not empty

271 if len(interface_atom_indices) == 0:

272 raise ValueError(f'Empty interface for agent "{agent_name}" in interaction "{interaction_name}": {agent_selection}')

273 interaction[f'interface_atom_indices_{agent}'] = interface_atom_indices

274

275 # Add residue notations

276 add_residues_indices(interaction, structure)

277

278 # Find strong bonds between residues in different interfaces

279 # Use the main topology, which is corrected and thus will retrieve the right bonds

280 strong_bonds = get_covalent_bonds_between(structure_file.path, interaction['selection_1'], interaction['selection_2'])

281 interaction['strong_bonds'] = strong_bonds

282

283 # Save the interactions version

284 interaction['version'] = '2.0.0'

285

286 # Log the final results

287 interface_residue_indices = sorted(interaction["interface_residue_indices_1"]

288 + interaction["interface_residue_indices_2"])

289 print(f'{interaction_name} (time: {pretty_frames_percent} %) -> {interface_residue_indices}')

290

291 # Filter away interactions whcih have failed

292 valid_interactions = [ inte for inte in interactions if not inte.get(FAILED_INTERACTION_FLAG, False) ]

293

294 # If there are no valid interactions then stop here

295 if len(valid_interactions) == 0: return []

296

297 # Create interaction duplicates to avoid mutating the already processed interactions

298 # Then fill these duplicates only with those fields to be uploaded to the database

299 file_interactions = []

300 for interaction in valid_interactions:

301 file_interaction = { key: value for key, value in interaction.items() if key in UPLOAD_FIELDS }

302 file_interactions.append(file_interaction)

303

304 # Write them to disk

305 save_json(file_interactions, output_analysis_filepath, indent = 4)

306

307 # Finally return the processed interactions

308 print(f' There is a total of {len(valid_interactions)} valid interactions')

309 return valid_interactions

310

311# Load interactions from an already existing interactions file

312def load_interactions (output_analysis_filepath : str, structure : 'Structure') -> list:

313 print(f' Using already calculated interactions in {output_analysis_filepath}')

314 # The stored interactions should carry only residue indices and strong bonds

315 interactions = load_json(output_analysis_filepath)

316 # Now we must complete every interactions dict by adding residues in source format and pytraj format

317 for interaction in interactions:

318 # If the interaction failed then there will be minimal information

319 if interaction.get(FAILED_INTERACTION_FLAG, False):

320 continue

321 # Add residue notations, which are not saved to disk

322 add_residues_indices(interaction, structure)

323 return interactions

324

325# Set an auxiliar function to add residue indices to an interactions object

326def add_residues_indices (interaction : dict, structure : 'Structure'):

327 # Iterate interaction agents

328 for agent in ['1','2']:

329 # Get interaction atom indices

330 atom_indices = interaction[f'atom_indices_{agent}']

331 # Now parse atom indices to residue indices for those analysis which work with residues

332 residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in atom_indices ])))

333 interaction[f'residue_indices_{agent}'] = residue_indices

334 # Get interaction interface atom indices

335 interface_atom_indices = interaction[f'interface_atom_indices_{agent}']

336 # Then with interface atoms/residues

337 interface_residue_indices = sorted(list(set([ structure.atoms[atom_index].residue_index for atom_index in interface_atom_indices ])))

338 interaction[f'interface_residue_indices_{agent}'] = interface_residue_indices

Coverage for model_workflow/tools/process_interactions.py: 49%

144 statements