Coverage for model_workflow/tools/image_and_fit.py: 12%
92 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1# This script is used to fit a trajectory (i.e. eliminate translation and rotation)
2# This process is carried by Gromacs
4from subprocess import run, PIPE, Popen
6from model_workflow.utils.constants import GROMACS_EXECUTABLE, GREY_HEADER, COLOR_END
7from model_workflow.utils.structures import Structure
8from model_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY
9from model_workflow.utils.type_hints import *
11# Set a pair of independent auxiliar functions to save and recover chains from a pdb file
12# WARNING: These functions must be used only when the pdb has not changed in number of atoms
14# Get a list with each atom chain from a pdb
15def get_chains (pdb_filename : str) -> list:
16 structure = Structure.from_pdb_file(pdb_filename)
17 chains = structure.chains
18 return chains
20# Set each atom chain from a pdb from a list
21def set_chains (pdb_filename : str, chains : list):
22 structure = Structure.from_pdb_file(pdb_filename)
23 structure.chains = chains
24 structure.generate_pdb_file(pdb_filename)
26# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids
27CENTER_SELECTION_NAME = 'protein_and_nucleic_acids'
28CENTER_INDEX_FILEPATH = '.index.ndx'
30# Image and fit a trajectory
31# image - Set if it must me imaged
32# fit - Set if it must be fitted
33# translation - Set how it must be translated during imaging
34# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses
35def image_and_fit (
36 # Tested supported formats are .pdb and .tpr
37 input_structure_file : 'File',
38 # Tested supported formats are .trr and .xtc
39 input_trajectory_file : 'File',
40 # This must be in .tpr format
41 # This is optional if there are no PBC residues
42 input_topology_file : Union['File', Exception],
43 output_structure_file : 'File',
44 output_trajectory_file : 'File',
45 image : bool, fit : bool,
46 translation : list,
47 # Note that this is an early provisional structure
48 structure : 'Structure',
49 # Selection of atoms which are to stay in PBC conditions after imaging
50 # Note that this is an early provisional atom selection
51 pbc_selection : 'Selection',
52 ) -> str:
54 print(f' Image: {image} | Fit: {fit}')
56 if not image and not fit:
57 return
59 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file
60 # This is because there is a '-pbc mol' step which only works with a .tpr file
61 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr'
62 has_pbc_atoms = bool(pbc_selection)
63 if image and not is_tpr_available and has_pbc_atoms:
64 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file')
66 # If we have coarse grain then we can only fit if we have a tpr file
67 # It will not work otherwise since we need atom masses
68 # For some image protocols we need to work with pdbs at some points so we directly surrender
69 if structure.select_cg():
70 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs')
71 if not is_tpr_available:
72 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file')
74 # First of all save chains
75 # Gromacs will delete chains so we need to recover them after
76 chains_backup = get_chains(input_structure_file.path)
78 # Set a custom index file (.ndx) to select protein and nucleic acids
79 # This is useful for some imaging steps (centering and fitting)
80 system_selection = structure.select('all', syntax='vmd')
81 protein_selection = structure.select_protein()
82 nucleic_selection = structure.select_nucleic()
83 custom_selection = protein_selection + nucleic_selection
84 if not custom_selection:
85 raise SystemExit(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.')
86 # Exclude PBC residues from this custom selection
87 custom_selection -= pbc_selection
88 # Convert both selections to a single ndx file which gromacs can read
89 system_selection_ndx = system_selection.to_ndx('System')
90 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME)
91 with open(CENTER_INDEX_FILEPATH, 'w') as file:
92 file.write(system_selection_ndx)
93 file.write(selection_ndx)
95 # Imaging --------------------------------------------------------------------------------------
97 if image:
98 print(' Running first imaging step')
100 # Check if coordinates are to be translated
101 must_translate = translation != [0, 0, 0]
103 # Set logs grey
104 print(GREY_HEADER)
106 # If so run the imaging process without the '-center' flag
107 if must_translate:
108 p = Popen([
109 "echo",
110 "System",
111 ], stdout=PIPE)
112 process = run([
113 GROMACS_EXECUTABLE,
114 "trjconv",
115 "-s",
116 input_structure_file.path,
117 "-f",
118 input_trajectory_file.path,
119 '-o',
120 output_trajectory_file.path,
121 '-trans',
122 str(translation[0]),
123 str(translation[1]),
124 str(translation[2]),
125 '-pbc',
126 'atom',
127 #'-ur', # WARNING: This makes everything stay the same
128 #'compact', # WARNING: This makes everything stay the same
129 '-quiet'
130 ], stdin=p.stdout, stdout=PIPE)
131 # If no translation is required and we have a tpr then use it to make molecules whole
132 elif is_tpr_available:
133 p = Popen([
134 "echo",
135 CENTER_SELECTION_NAME,
136 "System",
137 ], stdout=PIPE)
138 process = run([
139 GROMACS_EXECUTABLE,
140 "trjconv",
141 "-s",
142 input_topology_file.path,
143 "-f",
144 input_trajectory_file.path,
145 '-o',
146 output_trajectory_file.path,
147 '-pbc',
148 'whole',
149 '-quiet'
150 ], stdin=p.stdout, stdout=PIPE)
151 # Otherwise, center the custom selection
152 else:
153 p = Popen([
154 "echo",
155 CENTER_SELECTION_NAME,
156 "System",
157 ], stdout=PIPE)
158 process = run([
159 GROMACS_EXECUTABLE,
160 "trjconv",
161 "-s",
162 input_structure_file.path,
163 "-f",
164 input_trajectory_file.path,
165 '-o',
166 output_trajectory_file.path,
167 '-pbc',
168 'atom',
169 '-center',
170 '-n',
171 CENTER_INDEX_FILEPATH,
172 '-quiet'
173 ], stdin=p.stdout, stdout=PIPE)
174 # Run the defined process
175 logs = process.stdout.decode()
176 p.stdout.close()
177 print(COLOR_END)
179 # Check the output exists
180 if not output_trajectory_file.exists:
181 print(logs)
182 raise Exception('Something went wrong with Gromacs during first imaging step')
184 # Select the first frame of the recently imaged trayectory as the new topology
185 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
187 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime
188 if has_pbc_atoms:
189 print(' Fencing PBC molecules back to the box')
190 print(GREY_HEADER)
191 # Run Gromacs
192 p = Popen([
193 "echo",
194 "System",
195 ], stdout=PIPE)
196 logs = run([
197 GROMACS_EXECUTABLE,
198 "trjconv",
199 "-s",
200 input_topology_file.path,
201 "-f",
202 output_trajectory_file.path,
203 '-o',
204 output_trajectory_file.path,
205 '-pbc',
206 'mol', # Note that the 'mol' option requires a tpr to be passed
207 '-n',
208 CENTER_INDEX_FILEPATH,
209 '-quiet'
210 ], stdin=p.stdout, stdout=PIPE).stdout.decode()
211 p.stdout.close()
212 print(COLOR_END)
214 # Select the first frame of the recently translated and imaged trayectory as the new topology
215 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
218 # -----------------------------------------------------------------------------------------------
220 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule
221 else:
222 print(' Running no-jump')
223 print(GREY_HEADER)
224 # Run Gromacs
225 p = Popen([
226 "echo",
227 "System",
228 ], stdout=PIPE)
229 logs = run([
230 GROMACS_EXECUTABLE,
231 "trjconv",
232 "-s",
233 output_structure_file.path,
234 "-f",
235 output_trajectory_file.path,
236 '-o',
237 output_trajectory_file.path,
238 # Expanding the box may help in some situations
239 # However there are secondary effects for the trajectory
240 #'-box',
241 #'999',
242 #'999',
243 #'999',
244 '-pbc',
245 'nojump',
246 '-quiet'
247 ], stdin=p.stdout, stdout=PIPE).stdout.decode()
248 p.stdout.close()
249 print(COLOR_END)
251 # Fitting --------------------------------------------------------------------------------------
253 # Remove translation and rotation in the trajectory using the custom selection as reference
254 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted
255 if fit:
256 print(' Running fitting step')
258 # The trajectory to fit is the already imaged trajectory
259 # However, if there was no imaging, the trajectory to fit is the input trajectory
260 structure_to_fit = output_structure_file if image else input_structure_file
261 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file
263 # If we have a TPR then use it here
264 # DANI: No estoy seguro de si funcionaría bien después de un imageado
265 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada
266 if is_tpr_available: structure_to_fit = input_topology_file
268 print(GREY_HEADER)
269 # Run Gromacs
270 p = Popen([
271 "echo",
272 CENTER_SELECTION_NAME,
273 "System",
274 ], stdout=PIPE)
275 logs = run([
276 GROMACS_EXECUTABLE,
277 "trjconv",
278 "-s",
279 structure_to_fit.path,
280 "-f",
281 trajectroy_to_fit.path,
282 '-o',
283 output_trajectory_file.path,
284 '-fit',
285 'rot+trans',
286 '-n',
287 CENTER_INDEX_FILEPATH,
288 '-quiet'
289 ], stdin=p.stdout, stdout=PIPE).stdout.decode()
290 p.stdout.close()
291 print(COLOR_END)
293 # If there is no output structure at this time (i.e. there was no imaging) then create it now
294 # Note that the input structure is not necessarily the output structure in this scenario
295 # The fit may have been done using the topology and there may be an offset, so better dump it
296 if not output_structure_file.exists:
297 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
299 # Recover chains
300 set_chains(output_structure_file.path, chains_backup)
302 # Clean up the index file
303 # if exists(CENTER_INDEX_FILEPATH):
304 # remove(CENTER_INDEX_FILEPATH)
307# Get the first frame of a trajectory
308def reset_structure (
309 input_structure_filename : str,
310 input_trajectory_filename : str,
311 output_structure_filename : str,
312):
313 print(' Reseting structure after imaging step')
314 p = Popen([
315 "echo",
316 "System",
317 ], stdout=PIPE)
318 process = run([
319 GROMACS_EXECUTABLE,
320 "trjconv",
321 "-s",
322 input_structure_filename,
323 "-f",
324 input_trajectory_filename,
325 '-o',
326 output_structure_filename,
327 '-dump',
328 '0',
329 '-quiet'
330 ], stdin=p.stdout, stdout=PIPE, stderr=PIPE)
331 logs = process.stdout.decode()
332 p.stdout.close()