Coverage for model_workflow/analyses/tmscores.py: 85%

1import tmscoring

3from subprocess import run, PIPE, Popen

4import os

6from model_workflow.tools.get_pdb_frames import get_pdb_frames

7from model_workflow.utils.auxiliar import save_json

8from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME

9from model_workflow.utils.type_hints import *

11def tmscores (

12 trajectory_file : 'File',

13 output_directory : str,

14 first_frame_file : 'File',

15 average_structure_file : 'File',

16 structure : 'Structure',

17 pbc_selection : 'Selection',

18 snapshots : int,

19 frames_limit : int = 200):

20 """Perform the tm score using the tmscoring package."""

22 # Set the main output filepath

23 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}'

25 tmscore_references = [first_frame_file, average_structure_file]

27 # Set the alpha carbon selection

28 selection = structure.select('name CA', syntax='vmd')

29 if not selection:

30 print('WARNING: There are not atoms to be analyzed for the TM score analysis')

31 return

33 # Remove PBC residues from the selection

34 selection -= pbc_selection

35 if not selection:

36 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction')

37 return

39 # Convert the selection to ndx so we can use it in gromacs

40 selection_name = 'alpha-carbons'

41 ndx_selection = selection.to_ndx(selection_name)

42 ndx_filename = '.tmscore.ndx'

43 with open(ndx_filename, 'w') as file:

44 file.write(ndx_selection)

46 # Set the frame start and step

47 start = 0

48 step = None

50 output_analysis = []

52 # Iterate over each reference and group

53 for reference in tmscore_references:

54 print(' Running TM score using ' + reference.filename + ' as reference')

55 # Create a reference topology with only the group atoms

56 # WARNING: Yes, TM score would work also with the whole reference, but it takes more time!!

57 # This has been experimentally tested and it may take more than the double of time

58 grouped_reference = 'gref.pdb'

59 p = Popen([

60 "echo",

61 selection_name,

62 ], stdout=PIPE)

63 logs = run([

64 GROMACS_EXECUTABLE,

65 "trjconv",

66 "-s",

67 reference.path,

68 "-f",

69 reference.path,

70 '-o',

71 grouped_reference,

72 '-n',

73 ndx_filename,

74 "-dump",

75 "0",

76 '-quiet'

77 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode()

78 p.stdout.close()

79 # If the output does not exist at this point it means something went wrong with gromacs

80 if not os.path.exists(grouped_reference):

81 print(logs)

82 raise SystemExit('Something went wrong with GROMACS')

83 # Get the TM score of each frame

84 # It must be done this way since tmscoring does not support trajectories

85 tmscores = []

86 frames, step, count = get_pdb_frames(reference.path, trajectory_file.path, snapshots, frames_limit)

87 for current_frame in frames:

89 # Filter atoms in the current frame

90 filtered_frame = 'filtered_frame.pdb'

91 p = Popen([

92 "echo",

93 selection_name,

94 ], stdout=PIPE)

95 logs = run([

96 GROMACS_EXECUTABLE,

97 "trjconv",

98 "-s",

99 current_frame,

100 "-f",

101 current_frame,

102 '-o',

103 filtered_frame,

104 '-n',

105 ndx_filename,

106 '-quiet'

107 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode()

108 p.stdout.close()

109

110 # If the output does not exist at this point it means something went wrong with gromacs

111 if not os.path.exists(grouped_reference):

112 print(logs)

113 raise SystemExit('Something went wrong with GROMACS')

114

115 # Run the tmscoring over the current frame against the current reference

116 # Append the result data for each ligand

117 tmscore = tmscoring.get_tm(grouped_reference, filtered_frame)

118 tmscores.append(tmscore)

119

120 os.remove(filtered_frame)

121

122 # Get a standarized reference name

123 reference_name = REFERENCE_LABELS[reference.filename]

124 # Save the tmscores in the output object

125 data = {

126 'values': tmscores,

127 'reference': reference_name,

128 'group': 'c-alpha'

129 }

130 output_analysis.append(data)

131

132 os.remove(grouped_reference)

133 os.remove(ndx_filename)

134 # Export the analysis in json format

135 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)