Coverage for mddb_workflow/analyses/tmscores.py: 96%

1from os import remove

2from Bio.Data.IUPACData import protein_letters_1to3_extended

3import numpy as np

4import biotite.structure as struc

5import biotite.structure.io as strucio

6import biotite.structure.io.xtc as xtc

7from mddb_workflow.utils.auxiliar import save_json

8from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME, PROTEIN_RESIDUE_NAME_LETTERS

9from mddb_workflow.utils.gmx_spells import pdb_filter

10from mddb_workflow.utils.type_hints import *

11from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step

14def _tm_score_no_filter(reference, subject, reference_indices, subject_indices, reference_length="shorter"):

15 # Bypass the filter_amino_acids check by directly computing the score

16 ref_length = struc.tm._get_reference_length(

17 reference_length,

18 struc.get_residue_count(reference),

19 struc.get_residue_count(subject)

20 )

21 distances = struc.geometry.distance(reference[reference_indices], subject[subject_indices])

22 return np.sum(struc.tm._tm_score(distances, ref_length)).item() / ref_length

23_tm_score_no_filter.__doc__ = struc.tm_score.__doc__

25def tmscores (

26 trajectory_file : 'File',

27 output_directory : str,

28 first_frame_file : 'File',

29 average_structure_file : 'File',

30 structure : 'Structure',

31 pbc_selection : 'Selection',

32 snapshots : int,

33 frames_limit : int = 200):

34 """ Perform the tm score using the tmscoring package. """

36 # Set the main output filepath

37 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}'

38 struc.filter_amino_acids = lambda array: np.ones(array.array_length(), dtype=bool)

39 tmscore_references = [first_frame_file, average_structure_file]

41 # Set the alpha carbon selection

42 selection = structure.select('name CA', syntax='vmd')

43 if not selection:

44 print('WARNING: There are not atoms to be analyzed for the TM score analysis')

45 return

47 # Remove PBC residues from the selection

48 selection -= pbc_selection

49 if not selection:

50 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction')

51 return

53 # Set the frame start and step

54 start = 0

55 step = None

57 output_analysis = []

59 template = strucio.load_structure(first_frame_file.path)

60 ca_list = selection.to_list()

62 ca_mask = np.isin(np.arange(template.array_length()), ca_list)

63 template = template[ca_mask]

64 frame_step, _ = calculate_frame_step(snapshots, frames_limit)

65 xtc_file = xtc.XTCFile.read(trajectory_file.path, atom_i=ca_list, step=frame_step)

66 trajectory = xtc_file.get_structure(template)

68 # Iterate over each reference and group

69 for reference in tmscore_references:

70 print(f' Running TM score using {reference.filename} as reference')

72 # Get the TM score of each frame

73 tmscores = []

74 reference_frame = trajectory[0]

75 for i in range(1, len(trajectory), frame_step):

76 subject = trajectory[i]

77 superimposed, _, ref_indices, sub_indices = struc.superimpose_structural_homologs(

78 reference_frame, subject, max_iterations=1000)

79 # Run the tmscoring over the current frame against the current reference

80 tm = _tm_score_no_filter(reference_frame, superimposed, ref_indices, sub_indices)

81 tmscores.append(tm)

83 # Get a standarized reference name

84 reference_name = REFERENCE_LABELS[reference.filename]

85 # Save the tmscores in the output object

86 data = {

87 'values': tmscores,

88 'reference': reference_name,

89 'group': 'c-alpha'

90 }

91 output_analysis.append(data)

93 # Export the analysis in json format

94 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)