Coverage for mddb_workflow / analyses / tmscores.py: 96%
47 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1"""Module to perform the TM score analysis."""
3import numpy as np
4import biotite.structure as struc
5import biotite.structure.io as strucio
6import biotite.structure.io.xtc as xtc
7from mddb_workflow.utils.auxiliar import save_json
8from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME
9from mddb_workflow.utils.type_hints import *
10from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step
13def _tm_score_no_filter(reference, subject, reference_indices, subject_indices, reference_length="shorter"):
14 # Bypass the filter_amino_acids check by directly computing the score
15 ref_length = struc.tm._get_reference_length(
16 reference_length,
17 struc.get_residue_count(reference),
18 struc.get_residue_count(subject)
19 )
20 distances = struc.geometry.distance(reference[reference_indices], subject[subject_indices])
21 return np.sum(struc.tm._tm_score(distances, ref_length)).item() / ref_length
24def tmscores(
25 trajectory_file: 'File',
26 output_directory: str,
27 first_frame_file: 'File',
28 average_structure_file: 'File',
29 structure: 'Structure',
30 pbc_selection: 'Selection',
31 snapshots: int,
32 frames_limit: int = 200):
33 """Perform the tm score using the tmscoring package."""
34 # Set the main output filepath
35 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}'
36 struc.filter_amino_acids = lambda array: np.ones(array.array_length(), dtype=bool)
37 tmscore_references = [first_frame_file, average_structure_file]
39 # Set the alpha carbon selection
40 selection = structure.select('name CA', syntax='vmd')
41 if not selection:
42 print('WARNING: There are not atoms to be analyzed for the TM score analysis')
43 return
45 # Remove PBC residues from the selection
46 selection -= pbc_selection
47 if not selection:
48 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction')
49 return
51 # Set the frame start and step
52 start = 0
53 step = None
55 output_analysis = []
57 template = strucio.load_structure(first_frame_file.path)
58 ca_list = selection.to_list()
60 ca_mask = np.isin(np.arange(template.array_length()), ca_list)
61 template = template[ca_mask]
62 frame_step, _ = calculate_frame_step(snapshots, frames_limit)
63 xtc_file = xtc.XTCFile.read(trajectory_file.path, atom_i=ca_list, step=frame_step)
64 reduced_trajectory = xtc_file.get_structure(template)
66 # Iterate over each reference and group
67 for reference in tmscore_references:
68 print(f' Running TM score using {reference.filename} as reference')
70 # Get the TM score of each frame
71 tmscores = []
72 reference_frame = reduced_trajectory[0]
73 for i in range(len(reduced_trajectory)):
74 subject = reduced_trajectory[i]
75 superimposed, _, ref_indices, sub_indices = struc.superimpose_homologs(
76 reference_frame, subject)
77 # Run the tmscoring over the current frame against the current reference
78 tm = _tm_score_no_filter(reference_frame, superimposed, ref_indices, sub_indices)
79 tmscores.append(tm)
81 # Get a standarized reference name
82 reference_name = REFERENCE_LABELS[reference.filename]
83 # Save the tmscores in the output object
84 data = {
85 'values': tmscores,
86 'reference': reference_name,
87 'group': 'c-alpha'
88 }
89 output_analysis.append(data)
91 # Export the analysis in json format
92 save_json({'start': start, 'step': step, 'data': output_analysis}, output_analysis_filepath)