Coverage for mddb_workflow/tools/image_and_fit.py: 15%
66 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1# This script is used to fit a trajectory (i.e. eliminate translation and rotation)
2# This process is carried by Gromacs
4from mddb_workflow.utils.structures import Structure
5from mddb_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY
6from mddb_workflow.utils.gmx_spells import run_gromacs
7from mddb_workflow.utils.type_hints import *
9# Set a pair of independent auxiliar functions to save and recover chains from a pdb file
10# WARNING: These functions must be used only when the pdb has not changed in number of atoms
12# Get a list with each atom chain from a pdb
13def get_chains (pdb_filename : str) -> list:
14 structure = Structure.from_pdb_file(pdb_filename)
15 chains = structure.chains
16 return chains
18# Set each atom chain from a pdb from a list
19def set_chains (pdb_filename : str, chains : list):
20 structure = Structure.from_pdb_file(pdb_filename)
21 structure.chains = chains
22 structure.generate_pdb_file(pdb_filename)
24# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids
25CENTER_SELECTION_NAME = 'protein_and_nucleic_acids'
26CENTER_INDEX_FILEPATH = '.index.ndx'
28# Image and fit a trajectory
29# image - Set if it must me imaged
30# fit - Set if it must be fitted
31# translation - Set how it must be translated during imaging
32# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses
33def image_and_fit (
34 # Tested supported formats are .pdb and .tpr
35 input_structure_file : 'File',
36 # Tested supported formats are .trr and .xtc
37 input_trajectory_file : 'File',
38 # This must be in .tpr format
39 # This is optional if there are no PBC residues
40 input_topology_file : Union['File', Exception],
41 output_structure_file : 'File',
42 output_trajectory_file : 'File',
43 image : bool, fit : bool,
44 translation : list,
45 # Note that this is an early provisional structure
46 structure : 'Structure',
47 # Selection of atoms which are to stay in PBC conditions after imaging
48 # Note that this is an early provisional atom selection
49 pbc_selection : 'Selection',
50 ) -> str:
52 print(f' Image: {image} | Fit: {fit}')
54 if not image and not fit:
55 return
57 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file
58 # This is because there is a '-pbc mol' step which only works with a .tpr file
59 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr'
60 has_pbc_atoms = bool(pbc_selection)
61 if image and not is_tpr_available and has_pbc_atoms:
62 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file')
64 # If we have coarse grain then we can only fit if we have a tpr file
65 # It will not work otherwise since we need atom masses
66 # For some image protocols we need to work with pdbs at some points so we directly surrender
67 if structure.select_cg():
68 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs')
69 if not is_tpr_available:
70 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file')
72 # First of all save chains
73 # Gromacs will delete chains so we need to recover them after
74 chains_backup = get_chains(input_structure_file.path)
76 # Set a custom index file (.ndx) to select protein and nucleic acids
77 # This is useful for some imaging steps (centering and fitting)
78 system_selection = structure.select('all', syntax='vmd')
79 protein_selection = structure.select_protein()
80 nucleic_selection = structure.select_nucleic()
81 custom_selection = protein_selection + nucleic_selection
82 if not custom_selection:
83 raise SystemExit(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.')
84 # Exclude PBC residues from this custom selection
85 custom_selection -= pbc_selection
86 # Convert both selections to a single ndx file which gromacs can read
87 system_selection_ndx = system_selection.to_ndx('System')
88 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME)
89 with open(CENTER_INDEX_FILEPATH, 'w') as file:
90 file.write(system_selection_ndx)
91 file.write(selection_ndx)
93 # Imaging --------------------------------------------------------------------------------------
95 if image:
96 print(' Running first imaging step')
98 # Check if coordinates are to be translated
99 must_translate = translation != [0, 0, 0]
101 # If so run the imaging process without the '-center' flag
102 if must_translate:
103 # WARNING: Adding '-ur compact' makes everything stay the same
104 run_gromacs(f'trjconv -s {input_structure_file.path} \
105 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
106 -trans {translation[0]} {translation[1]} {translation[2]} \
107 -pbc atom', user_input = 'System', show_error_logs = True)
108 # If no translation is required and we have a tpr then use it to make molecules whole
109 elif is_tpr_available:
110 run_gromacs(f'trjconv -s {input_topology_file.path} \
111 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
112 -pbc whole', user_input = 'System', show_error_logs = True)
113 # Otherwise, center the custom selection
114 else:
115 run_gromacs(f'trjconv -s {input_structure_file.path} \
116 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
117 -pbc atom -center -n {CENTER_INDEX_FILEPATH}',
118 user_input = f'{CENTER_SELECTION_NAME} System', show_error_logs = True)
120 # Select the first frame of the recently imaged trayectory as the new topology
121 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
123 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime
124 if has_pbc_atoms:
125 print(' Fencing PBC molecules back to the box')
126 # Run Gromacs
127 run_gromacs(f'trjconv -s {input_topology_file.path} \
128 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
129 -pbc mol -n {CENTER_INDEX_FILEPATH}',
130 user_input = 'System', show_error_logs = True)
132 # Select the first frame of the recently translated and imaged trayectory as the new topology
133 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
136 # -----------------------------------------------------------------------------------------------
138 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule
139 else:
140 print(' Running no-jump')
141 # Run Gromacs
142 # Expanding the box may help in some situations but there are secondary effects
143 # i.e. -box 999 999 999
144 run_gromacs(f'trjconv -s {output_structure_file.path} \
145 -f {output_trajectory_file.path} -o {output_trajectory_file.path} \
146 -pbc nojump',
147 user_input = 'System', show_error_logs = True)
149 # Fitting --------------------------------------------------------------------------------------
151 # Remove translation and rotation in the trajectory using the custom selection as reference
152 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted
153 if fit:
154 print(' Running fitting step')
156 # The trajectory to fit is the already imaged trajectory
157 # However, if there was no imaging, the trajectory to fit is the input trajectory
158 structure_to_fit = output_structure_file if image else input_structure_file
159 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file
161 # If we have a TPR then use it here
162 # DANI: No estoy seguro de si funcionaría bien después de un imageado
163 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada
164 if is_tpr_available: structure_to_fit = input_topology_file
166 # Run Gromacs
167 run_gromacs(f'trjconv -s {structure_to_fit.path} \
168 -f {trajectroy_to_fit.path} -o {output_trajectory_file.path} \
169 -fit rot+trans -n {CENTER_INDEX_FILEPATH}',
170 user_input = f'{CENTER_SELECTION_NAME} System',
171 show_error_logs = True)
173 # If there is no output structure at this time (i.e. there was no imaging) then create it now
174 # Note that the input structure is not necessarily the output structure in this scenario
175 # The fit may have been done using the topology and there may be an offset, so better dump it
176 if not output_structure_file.exists:
177 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
179 # Recover chains
180 set_chains(output_structure_file.path, chains_backup)
182 # Clean up the index file
183 # if exists(CENTER_INDEX_FILEPATH):
184 # remove(CENTER_INDEX_FILEPATH)
187# Get the first frame of a trajectory
188def reset_structure (
189 input_structure_filepath : str,
190 input_trajectory_filepath : str,
191 output_structure_filepath : str,
192):
193 print(' Reseting structure after imaging step')
194 run_gromacs(f'trjconv -s {input_structure_filepath} \
195 -f {input_trajectory_filepath} -o {output_structure_filepath} \
196 -dump 0', user_input = 'System')