Coverage for model_workflow/analyses/tmscores.py: 85%

54 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import tmscoring 

2 

3from subprocess import run, PIPE, Popen 

4import os 

5 

6from model_workflow.tools.get_pdb_frames import get_pdb_frames 

7from model_workflow.utils.auxiliar import save_json 

8from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME 

9from model_workflow.utils.type_hints import * 

10 

11def tmscores ( 

12 trajectory_file : 'File', 

13 output_directory : str, 

14 first_frame_file : 'File', 

15 average_structure_file : 'File', 

16 structure : 'Structure', 

17 pbc_selection : 'Selection', 

18 snapshots : int, 

19 frames_limit : int = 200): 

20 """Perform the tm score using the tmscoring package.""" 

21 

22 # Set the main output filepath 

23 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}' 

24 

25 tmscore_references = [first_frame_file, average_structure_file] 

26 

27 # Set the alpha carbon selection 

28 selection = structure.select('name CA', syntax='vmd') 

29 if not selection: 

30 print('WARNING: There are not atoms to be analyzed for the TM score analysis') 

31 return 

32 

33 # Remove PBC residues from the selection 

34 selection -= pbc_selection 

35 if not selection: 

36 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction') 

37 return 

38 

39 # Convert the selection to ndx so we can use it in gromacs 

40 selection_name = 'alpha-carbons' 

41 ndx_selection = selection.to_ndx(selection_name) 

42 ndx_filename = '.tmscore.ndx' 

43 with open(ndx_filename, 'w') as file: 

44 file.write(ndx_selection) 

45 

46 # Set the frame start and step 

47 start = 0 

48 step = None 

49 

50 output_analysis = [] 

51 

52 # Iterate over each reference and group 

53 for reference in tmscore_references: 

54 print(' Running TM score using ' + reference.filename + ' as reference') 

55 # Create a reference topology with only the group atoms 

56 # WARNING: Yes, TM score would work also with the whole reference, but it takes more time!! 

57 # This has been experimentally tested and it may take more than the double of time 

58 grouped_reference = 'gref.pdb' 

59 p = Popen([ 

60 "echo", 

61 selection_name, 

62 ], stdout=PIPE) 

63 logs = run([ 

64 GROMACS_EXECUTABLE, 

65 "trjconv", 

66 "-s", 

67 reference.path, 

68 "-f", 

69 reference.path, 

70 '-o', 

71 grouped_reference, 

72 '-n', 

73 ndx_filename, 

74 "-dump", 

75 "0", 

76 '-quiet' 

77 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode() 

78 p.stdout.close() 

79 # If the output does not exist at this point it means something went wrong with gromacs 

80 if not os.path.exists(grouped_reference): 

81 print(logs) 

82 raise SystemExit('Something went wrong with GROMACS') 

83 # Get the TM score of each frame 

84 # It must be done this way since tmscoring does not support trajectories 

85 tmscores = [] 

86 frames, step, count = get_pdb_frames(reference.path, trajectory_file.path, snapshots, frames_limit) 

87 for current_frame in frames: 

88 

89 # Filter atoms in the current frame 

90 filtered_frame = 'filtered_frame.pdb' 

91 p = Popen([ 

92 "echo", 

93 selection_name, 

94 ], stdout=PIPE) 

95 logs = run([ 

96 GROMACS_EXECUTABLE, 

97 "trjconv", 

98 "-s", 

99 current_frame, 

100 "-f", 

101 current_frame, 

102 '-o', 

103 filtered_frame, 

104 '-n', 

105 ndx_filename, 

106 '-quiet' 

107 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode() 

108 p.stdout.close() 

109 

110 # If the output does not exist at this point it means something went wrong with gromacs 

111 if not os.path.exists(grouped_reference): 

112 print(logs) 

113 raise SystemExit('Something went wrong with GROMACS') 

114 

115 # Run the tmscoring over the current frame against the current reference 

116 # Append the result data for each ligand 

117 tmscore = tmscoring.get_tm(grouped_reference, filtered_frame) 

118 tmscores.append(tmscore) 

119 

120 os.remove(filtered_frame) 

121 

122 # Get a standarized reference name 

123 reference_name = REFERENCE_LABELS[reference.filename] 

124 # Save the tmscores in the output object 

125 data = { 

126 'values': tmscores, 

127 'reference': reference_name, 

128 'group': 'c-alpha' 

129 } 

130 output_analysis.append(data) 

131 

132 os.remove(grouped_reference) 

133 os.remove(ndx_filename) 

134 # Export the analysis in json format 

135 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)