Coverage for mddb_workflow/analyses/tmscores.py: 96%

51 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from os import remove 

2from Bio.Data.IUPACData import protein_letters_1to3_extended 

3import numpy as np 

4import biotite.structure as struc 

5import biotite.structure.io as strucio 

6import biotite.structure.io.xtc as xtc 

7from mddb_workflow.utils.auxiliar import save_json 

8from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME, PROTEIN_RESIDUE_NAME_LETTERS 

9from mddb_workflow.utils.gmx_spells import pdb_filter 

10from mddb_workflow.utils.type_hints import * 

11from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step 

12 

13 

14def _tm_score_no_filter(reference, subject, reference_indices, subject_indices, reference_length="shorter"): 

15 # Bypass the filter_amino_acids check by directly computing the score 

16 ref_length = struc.tm._get_reference_length( 

17 reference_length, 

18 struc.get_residue_count(reference), 

19 struc.get_residue_count(subject) 

20 ) 

21 distances = struc.geometry.distance(reference[reference_indices], subject[subject_indices]) 

22 return np.sum(struc.tm._tm_score(distances, ref_length)).item() / ref_length 

23_tm_score_no_filter.__doc__ = struc.tm_score.__doc__ 

24 

25def tmscores ( 

26 trajectory_file : 'File', 

27 output_directory : str, 

28 first_frame_file : 'File', 

29 average_structure_file : 'File', 

30 structure : 'Structure', 

31 pbc_selection : 'Selection', 

32 snapshots : int, 

33 frames_limit : int = 200): 

34 """ Perform the tm score using the tmscoring package. """ 

35 

36 # Set the main output filepath 

37 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}' 

38 struc.filter_amino_acids = lambda array: np.ones(array.array_length(), dtype=bool) 

39 tmscore_references = [first_frame_file, average_structure_file] 

40 

41 # Set the alpha carbon selection 

42 selection = structure.select('name CA', syntax='vmd') 

43 if not selection: 

44 print('WARNING: There are not atoms to be analyzed for the TM score analysis') 

45 return 

46 

47 # Remove PBC residues from the selection 

48 selection -= pbc_selection 

49 if not selection: 

50 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction') 

51 return 

52 

53 # Set the frame start and step 

54 start = 0 

55 step = None 

56 

57 output_analysis = [] 

58 

59 template = strucio.load_structure(first_frame_file.path) 

60 ca_list = selection.to_list() 

61 

62 ca_mask = np.isin(np.arange(template.array_length()), ca_list) 

63 template = template[ca_mask] 

64 frame_step, _ = calculate_frame_step(snapshots, frames_limit) 

65 xtc_file = xtc.XTCFile.read(trajectory_file.path, atom_i=ca_list, step=frame_step) 

66 trajectory = xtc_file.get_structure(template) 

67 

68 # Iterate over each reference and group 

69 for reference in tmscore_references: 

70 print(f' Running TM score using {reference.filename} as reference') 

71 

72 # Get the TM score of each frame 

73 tmscores = [] 

74 reference_frame = trajectory[0] 

75 for i in range(1, len(trajectory), frame_step): 

76 subject = trajectory[i] 

77 superimposed, _, ref_indices, sub_indices = struc.superimpose_structural_homologs( 

78 reference_frame, subject, max_iterations=1000) 

79 # Run the tmscoring over the current frame against the current reference 

80 tm = _tm_score_no_filter(reference_frame, superimposed, ref_indices, sub_indices) 

81 tmscores.append(tm) 

82 

83 # Get a standarized reference name 

84 reference_name = REFERENCE_LABELS[reference.filename] 

85 # Save the tmscores in the output object 

86 data = { 

87 'values': tmscores, 

88 'reference': reference_name, 

89 'group': 'c-alpha' 

90 } 

91 output_analysis.append(data) 

92 

93 # Export the analysis in json format 

94 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)