Coverage for mddb_workflow / analyses / tmscores.py: 96%

47 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1"""Module to perform the TM score analysis.""" 

2 

3import numpy as np 

4import biotite.structure as struc 

5import biotite.structure.io as strucio 

6import biotite.structure.io.xtc as xtc 

7from mddb_workflow.utils.auxiliar import save_json 

8from mddb_workflow.utils.constants import REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME 

9from mddb_workflow.utils.type_hints import * 

10from mddb_workflow.tools.get_reduced_trajectory import calculate_frame_step 

11 

12 

13def _tm_score_no_filter(reference, subject, reference_indices, subject_indices, reference_length="shorter"): 

14 # Bypass the filter_amino_acids check by directly computing the score 

15 ref_length = struc.tm._get_reference_length( 

16 reference_length, 

17 struc.get_residue_count(reference), 

18 struc.get_residue_count(subject) 

19 ) 

20 distances = struc.geometry.distance(reference[reference_indices], subject[subject_indices]) 

21 return np.sum(struc.tm._tm_score(distances, ref_length)).item() / ref_length 

22 

23 

24def tmscores( 

25 trajectory_file: 'File', 

26 output_directory: str, 

27 first_frame_file: 'File', 

28 average_structure_file: 'File', 

29 structure: 'Structure', 

30 pbc_selection: 'Selection', 

31 snapshots: int, 

32 frames_limit: int = 200): 

33 """Perform the tm score using the tmscoring package.""" 

34 # Set the main output filepath 

35 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}' 

36 struc.filter_amino_acids = lambda array: np.ones(array.array_length(), dtype=bool) 

37 tmscore_references = [first_frame_file, average_structure_file] 

38 

39 # Set the alpha carbon selection 

40 selection = structure.select('name CA', syntax='vmd') 

41 if not selection: 

42 print('WARNING: There are not atoms to be analyzed for the TM score analysis') 

43 return 

44 

45 # Remove PBC residues from the selection 

46 selection -= pbc_selection 

47 if not selection: 

48 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction') 

49 return 

50 

51 # Set the frame start and step 

52 start = 0 

53 step = None 

54 

55 output_analysis = [] 

56 

57 template = strucio.load_structure(first_frame_file.path) 

58 ca_list = selection.to_list() 

59 

60 ca_mask = np.isin(np.arange(template.array_length()), ca_list) 

61 template = template[ca_mask] 

62 frame_step, _ = calculate_frame_step(snapshots, frames_limit) 

63 xtc_file = xtc.XTCFile.read(trajectory_file.path, atom_i=ca_list, step=frame_step) 

64 reduced_trajectory = xtc_file.get_structure(template) 

65 

66 # Iterate over each reference and group 

67 for reference in tmscore_references: 

68 print(f' Running TM score using {reference.filename} as reference') 

69 

70 # Get the TM score of each frame 

71 tmscores = [] 

72 reference_frame = reduced_trajectory[0] 

73 for i in range(len(reduced_trajectory)): 

74 subject = reduced_trajectory[i] 

75 superimposed, _, ref_indices, sub_indices = struc.superimpose_homologs( 

76 reference_frame, subject) 

77 # Run the tmscoring over the current frame against the current reference 

78 tm = _tm_score_no_filter(reference_frame, superimposed, ref_indices, sub_indices) 

79 tmscores.append(tm) 

80 

81 # Get a standarized reference name 

82 reference_name = REFERENCE_LABELS[reference.filename] 

83 # Save the tmscores in the output object 

84 data = { 

85 'values': tmscores, 

86 'reference': reference_name, 

87 'group': 'c-alpha' 

88 } 

89 output_analysis.append(data) 

90 

91 # Export the analysis in json format 

92 save_json({'start': start, 'step': step, 'data': output_analysis}, output_analysis_filepath)