Coverage for mddb_workflow / tools / image_and_fit.py: 15%
66 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1# This script is used to fit a trajectory (i.e. eliminate translation and rotation)
2# This process is carried by Gromacs
4from mddb_workflow.utils.structures import Structure
5from mddb_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY
6from mddb_workflow.utils.gmx_spells import run_gromacs
7from mddb_workflow.utils.type_hints import *
9# Set a pair of independent auxiliar functions to save and recover chains from a pdb file
10# WARNING: These functions must be used only when the pdb has not changed in number of atoms
12# Get a list with each atom chain from a pdb
13def get_chains (pdb_filename : str) -> list:
14 structure = Structure.from_pdb_file(pdb_filename)
15 chains = structure.chains
16 return chains
18# Set each atom chain from a pdb from a list
19def set_chains (pdb_filename : str, chains : list):
20 structure = Structure.from_pdb_file(pdb_filename)
21 structure.chains = chains
22 structure.generate_pdb_file(pdb_filename)
24# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids
25CENTER_SELECTION_NAME = 'protein_and_nucleic_acids'
26CENTER_INDEX_FILEPATH = '.index.ndx'
28# Image and fit a trajectory
29# image - Set if it must me imaged
30# fit - Set if it must be fitted
31# translation - Set how it must be translated during imaging
32# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses
33def image_and_fit (
34 # Tested supported formats are .pdb and .tpr
35 input_structure_file : 'File',
36 # Tested supported formats are .trr and .xtc
37 input_trajectory_file : 'File',
38 # This must be in .tpr format
39 # This is optional if there are no PBC residues
40 input_topology_file : Union['File', Exception],
41 output_structure_file : 'File',
42 output_trajectory_file : 'File',
43 image : bool,
44 fit : bool,
45 translation : list,
46 # Note that this is an early provisional structure
47 structure : 'Structure',
48 # Selection of atoms which are to stay in PBC conditions after imaging
49 # Note that this is an early provisional atom selection
50 pbc_selection : 'Selection',
51 ) -> str:
53 print(f' Image: {image} | Fit: {fit}')
55 if not image and not fit:
56 return
58 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file
59 # This is because there is a '-pbc mol' step which only works with a .tpr file
60 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr'
61 has_pbc_atoms = bool(pbc_selection)
62 if image and not is_tpr_available and has_pbc_atoms:
63 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file')
65 # If we have coarse grain then we can only fit if we have a tpr file
66 # It will not work otherwise since we need atom masses
67 # For some image protocols we need to work with pdbs at some points so we directly surrender
68 if structure.select_cg():
69 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs')
70 if not is_tpr_available:
71 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file')
73 # First of all save chains
74 # Gromacs will delete chains so we need to recover them after
75 chains_backup = get_chains(input_structure_file.path)
77 # Set a custom index file (.ndx) to select protein and nucleic acids
78 # This is useful for some imaging steps (centering and fitting)
79 system_selection = structure.select('all', syntax='vmd')
80 protein_selection = structure.select_protein()
81 nucleic_selection = structure.select_nucleic()
82 custom_selection = protein_selection + nucleic_selection
83 if not custom_selection:
84 raise InputError(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.')
85 # Exclude PBC residues from this custom selection
86 custom_selection -= pbc_selection
87 # Convert both selections to a single ndx file which gromacs can read
88 system_selection_ndx = system_selection.to_ndx('System')
89 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME)
90 with open(CENTER_INDEX_FILEPATH, 'w') as file:
91 file.write(system_selection_ndx)
92 file.write(selection_ndx)
94 # Imaging --------------------------------------------------------------------------------------
96 if image:
97 print(' Running first imaging step')
99 # Check if coordinates are to be translated
100 must_translate = translation != [0, 0, 0]
102 # If so run the imaging process without the '-center' flag
103 if must_translate:
104 # WARNING: Adding '-ur compact' makes everything stay the same
105 run_gromacs(f'trjconv -s {input_structure_file.path} \
106 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
107 -trans {translation[0]} {translation[1]} {translation[2]} \
108 -pbc atom', user_input = 'System', show_error_logs = True)
109 # If no translation is required and we have a tpr then use it to make molecules whole
110 elif is_tpr_available:
111 run_gromacs(f'trjconv -s {input_topology_file.path} \
112 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
113 -pbc whole', user_input = 'System', show_error_logs = True)
114 # Otherwise, center the custom selection
115 else:
116 run_gromacs(f'trjconv -s {input_structure_file.path} \
117 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
118 -pbc atom -center -n {CENTER_INDEX_FILEPATH}',
119 user_input = f'{CENTER_SELECTION_NAME} System', show_error_logs = True)
121 # Select the first frame of the recently imaged trayectory as the new topology
122 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
124 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime
125 if has_pbc_atoms:
126 print(' Fencing PBC molecules back to the box')
127 # Run Gromacs
128 run_gromacs(f'trjconv -s {input_topology_file.path} \
129 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \
130 -pbc mol -n {CENTER_INDEX_FILEPATH}',
131 user_input = 'System', show_error_logs = True)
133 # Select the first frame of the recently translated and imaged trayectory as the new topology
134 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
137 # -----------------------------------------------------------------------------------------------
139 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule
140 else:
141 print(' Running no-jump')
142 # Run Gromacs
143 # Expanding the box may help in some situations but there are secondary effects
144 # i.e. -box 999 999 999
145 run_gromacs(f'trjconv -s {output_structure_file.path} \
146 -f {output_trajectory_file.path} -o {output_trajectory_file.path} \
147 -pbc nojump',
148 user_input = 'System', show_error_logs = True)
150 # Fitting --------------------------------------------------------------------------------------
152 # Remove translation and rotation in the trajectory using the custom selection as reference
153 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted
154 if fit:
155 print(' Running fitting step')
157 # The trajectory to fit is the already imaged trajectory
158 # However, if there was no imaging, the trajectory to fit is the input trajectory
159 structure_to_fit = output_structure_file if image else input_structure_file
160 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file
162 # If we have a TPR then use it here
163 # DANI: No estoy seguro de si funcionaría bien después de un imageado
164 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada
165 if is_tpr_available: structure_to_fit = input_topology_file
167 # Run Gromacs
168 run_gromacs(f'trjconv -s {structure_to_fit.path} \
169 -f {trajectroy_to_fit.path} -o {output_trajectory_file.path} \
170 -fit rot+trans -n {CENTER_INDEX_FILEPATH}',
171 user_input = f'{CENTER_SELECTION_NAME} System',
172 show_error_logs = True)
174 # If there is no output structure at this time (i.e. there was no imaging) then create it now
175 # Note that the input structure is not necessarily the output structure in this scenario
176 # The fit may have been done using the topology and there may be an offset, so better dump it
177 if not output_structure_file.exists:
178 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path)
180 # Recover chains
181 set_chains(output_structure_file.path, chains_backup)
183 # Clean up the index file
184 # if exists(CENTER_INDEX_FILEPATH):
185 # remove(CENTER_INDEX_FILEPATH)
188# Get the first frame of a trajectory
189def reset_structure (
190 input_structure_filepath : str,
191 input_trajectory_filepath : str,
192 output_structure_filepath : str,
193):
194 print(' Reseting structure after imaging step')
195 run_gromacs(f'trjconv -s {input_structure_filepath} \
196 -f {input_trajectory_filepath} -o {output_structure_filepath} \
197 -dump 0', user_input = 'System')