Coverage for mddb_workflow/analyses/tmscores.py: 96%
51 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1from os import remove
2from Bio.Data.IUPACData import protein_letters_1to3_extended
3import numpy as np
4import biotite.structure as struc
5import biotite.structure.io as strucio
6import biotite.structure.io.xtc as xtc
7from mddb_workflow.utils.auxiliar import save_json
8from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME, PROTEIN_RESIDUE_NAME_LETTERS
9from mddb_workflow.utils.gmx_spells import pdb_filter
10from mddb_workflow.utils.type_hints import *
11from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step
14def _tm_score_no_filter(reference, subject, reference_indices, subject_indices, reference_length="shorter"):
15 # Bypass the filter_amino_acids check by directly computing the score
16 ref_length = struc.tm._get_reference_length(
17 reference_length,
18 struc.get_residue_count(reference),
19 struc.get_residue_count(subject)
20 )
21 distances = struc.geometry.distance(reference[reference_indices], subject[subject_indices])
22 return np.sum(struc.tm._tm_score(distances, ref_length)).item() / ref_length
23_tm_score_no_filter.__doc__ = struc.tm_score.__doc__
25def tmscores (
26 trajectory_file : 'File',
27 output_directory : str,
28 first_frame_file : 'File',
29 average_structure_file : 'File',
30 structure : 'Structure',
31 pbc_selection : 'Selection',
32 snapshots : int,
33 frames_limit : int = 200):
34 """ Perform the tm score using the tmscoring package. """
36 # Set the main output filepath
37 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}'
38 struc.filter_amino_acids = lambda array: np.ones(array.array_length(), dtype=bool)
39 tmscore_references = [first_frame_file, average_structure_file]
41 # Set the alpha carbon selection
42 selection = structure.select('name CA', syntax='vmd')
43 if not selection:
44 print('WARNING: There are not atoms to be analyzed for the TM score analysis')
45 return
47 # Remove PBC residues from the selection
48 selection -= pbc_selection
49 if not selection:
50 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction')
51 return
53 # Set the frame start and step
54 start = 0
55 step = None
57 output_analysis = []
59 template = strucio.load_structure(first_frame_file.path)
60 ca_list = selection.to_list()
62 ca_mask = np.isin(np.arange(template.array_length()), ca_list)
63 template = template[ca_mask]
64 frame_step, _ = calculate_frame_step(snapshots, frames_limit)
65 xtc_file = xtc.XTCFile.read(trajectory_file.path, atom_i=ca_list, step=frame_step)
66 trajectory = xtc_file.get_structure(template)
68 # Iterate over each reference and group
69 for reference in tmscore_references:
70 print(f' Running TM score using {reference.filename} as reference')
72 # Get the TM score of each frame
73 tmscores = []
74 reference_frame = trajectory[0]
75 for i in range(1, len(trajectory), frame_step):
76 subject = trajectory[i]
77 superimposed, _, ref_indices, sub_indices = struc.superimpose_structural_homologs(
78 reference_frame, subject, max_iterations=1000)
79 # Run the tmscoring over the current frame against the current reference
80 tm = _tm_score_no_filter(reference_frame, superimposed, ref_indices, sub_indices)
81 tmscores.append(tm)
83 # Get a standarized reference name
84 reference_name = REFERENCE_LABELS[reference.filename]
85 # Save the tmscores in the output object
86 data = {
87 'values': tmscores,
88 'reference': reference_name,
89 'group': 'c-alpha'
90 }
91 output_analysis.append(data)
93 # Export the analysis in json format
94 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)