Coverage for model_workflow/analyses/tmscores.py: 85%
54 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import tmscoring
3from subprocess import run, PIPE, Popen
4import os
6from model_workflow.tools.get_pdb_frames import get_pdb_frames
7from model_workflow.utils.auxiliar import save_json
8from model_workflow.utils.constants import GROMACS_EXECUTABLE, REFERENCE_LABELS, OUTPUT_TMSCORES_FILENAME
9from model_workflow.utils.type_hints import *
11def tmscores (
12 trajectory_file : 'File',
13 output_directory : str,
14 first_frame_file : 'File',
15 average_structure_file : 'File',
16 structure : 'Structure',
17 pbc_selection : 'Selection',
18 snapshots : int,
19 frames_limit : int = 200):
20 """Perform the tm score using the tmscoring package."""
22 # Set the main output filepath
23 output_analysis_filepath = f'{output_directory}/{OUTPUT_TMSCORES_FILENAME}'
25 tmscore_references = [first_frame_file, average_structure_file]
27 # Set the alpha carbon selection
28 selection = structure.select('name CA', syntax='vmd')
29 if not selection:
30 print('WARNING: There are not atoms to be analyzed for the TM score analysis')
31 return
33 # Remove PBC residues from the selection
34 selection -= pbc_selection
35 if not selection:
36 print('WARNING: There are not atoms to be analyzed for the TM score analysis after PBC substraction')
37 return
39 # Convert the selection to ndx so we can use it in gromacs
40 selection_name = 'alpha-carbons'
41 ndx_selection = selection.to_ndx(selection_name)
42 ndx_filename = '.tmscore.ndx'
43 with open(ndx_filename, 'w') as file:
44 file.write(ndx_selection)
46 # Set the frame start and step
47 start = 0
48 step = None
50 output_analysis = []
52 # Iterate over each reference and group
53 for reference in tmscore_references:
54 print(' Running TM score using ' + reference.filename + ' as reference')
55 # Create a reference topology with only the group atoms
56 # WARNING: Yes, TM score would work also with the whole reference, but it takes more time!!
57 # This has been experimentally tested and it may take more than the double of time
58 grouped_reference = 'gref.pdb'
59 p = Popen([
60 "echo",
61 selection_name,
62 ], stdout=PIPE)
63 logs = run([
64 GROMACS_EXECUTABLE,
65 "trjconv",
66 "-s",
67 reference.path,
68 "-f",
69 reference.path,
70 '-o',
71 grouped_reference,
72 '-n',
73 ndx_filename,
74 "-dump",
75 "0",
76 '-quiet'
77 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode()
78 p.stdout.close()
79 # If the output does not exist at this point it means something went wrong with gromacs
80 if not os.path.exists(grouped_reference):
81 print(logs)
82 raise SystemExit('Something went wrong with GROMACS')
83 # Get the TM score of each frame
84 # It must be done this way since tmscoring does not support trajectories
85 tmscores = []
86 frames, step, count = get_pdb_frames(reference.path, trajectory_file.path, snapshots, frames_limit)
87 for current_frame in frames:
89 # Filter atoms in the current frame
90 filtered_frame = 'filtered_frame.pdb'
91 p = Popen([
92 "echo",
93 selection_name,
94 ], stdout=PIPE)
95 logs = run([
96 GROMACS_EXECUTABLE,
97 "trjconv",
98 "-s",
99 current_frame,
100 "-f",
101 current_frame,
102 '-o',
103 filtered_frame,
104 '-n',
105 ndx_filename,
106 '-quiet'
107 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE).stdout.decode()
108 p.stdout.close()
110 # If the output does not exist at this point it means something went wrong with gromacs
111 if not os.path.exists(grouped_reference):
112 print(logs)
113 raise SystemExit('Something went wrong with GROMACS')
115 # Run the tmscoring over the current frame against the current reference
116 # Append the result data for each ligand
117 tmscore = tmscoring.get_tm(grouped_reference, filtered_frame)
118 tmscores.append(tmscore)
120 os.remove(filtered_frame)
122 # Get a standarized reference name
123 reference_name = REFERENCE_LABELS[reference.filename]
124 # Save the tmscores in the output object
125 data = {
126 'values': tmscores,
127 'reference': reference_name,
128 'group': 'c-alpha'
129 }
130 output_analysis.append(data)
132 os.remove(grouped_reference)
133 os.remove(ndx_filename)
134 # Export the analysis in json format
135 save_json({ 'start': start, 'step': step, 'data': output_analysis }, output_analysis_filepath)