Coverage for mddb_workflow/tools/image_and_fit.py: 15%

66 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1# This script is used to fit a trajectory (i.e. eliminate translation and rotation) 

2# This process is carried by Gromacs 

3 

4from mddb_workflow.utils.structures import Structure 

5from mddb_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY 

6from mddb_workflow.utils.gmx_spells import run_gromacs 

7from mddb_workflow.utils.type_hints import * 

8 

9# Set a pair of independent auxiliar functions to save and recover chains from a pdb file 

10# WARNING: These functions must be used only when the pdb has not changed in number of atoms 

11 

12# Get a list with each atom chain from a pdb 

13def get_chains (pdb_filename : str) -> list: 

14 structure = Structure.from_pdb_file(pdb_filename) 

15 chains = structure.chains 

16 return chains 

17 

18# Set each atom chain from a pdb from a list 

19def set_chains (pdb_filename : str, chains : list): 

20 structure = Structure.from_pdb_file(pdb_filename) 

21 structure.chains = chains 

22 structure.generate_pdb_file(pdb_filename) 

23 

24# Set the default centering/fitting selection (vmd syntax): protein and nucleic acids 

25CENTER_SELECTION_NAME = 'protein_and_nucleic_acids' 

26CENTER_INDEX_FILEPATH = '.index.ndx' 

27 

28# Image and fit a trajectory 

29# image - Set if it must me imaged 

30# fit - Set if it must be fitted 

31# translation - Set how it must be translated during imaging 

32# pbc_selection - Set a selection to exclude PBC residues from both the centering and the fitting focuses 

33def image_and_fit ( 

34 # Tested supported formats are .pdb and .tpr 

35 input_structure_file : 'File', 

36 # Tested supported formats are .trr and .xtc 

37 input_trajectory_file : 'File', 

38 # This must be in .tpr format 

39 # This is optional if there are no PBC residues 

40 input_topology_file : Union['File', Exception], 

41 output_structure_file : 'File', 

42 output_trajectory_file : 'File', 

43 image : bool, fit : bool, 

44 translation : list, 

45 # Note that this is an early provisional structure 

46 structure : 'Structure', 

47 # Selection of atoms which are to stay in PBC conditions after imaging 

48 # Note that this is an early provisional atom selection 

49 pbc_selection : 'Selection', 

50 ) -> str: 

51 

52 print(f' Image: {image} | Fit: {fit}') 

53 

54 if not image and not fit: 

55 return 

56 

57 # In order to run the imaging with PBC residues we need a .tpr file, not just the .pdb file 

58 # This is because there is a '-pbc mol' step which only works with a .tpr file 

59 is_tpr_available = input_topology_file != MISSING_TOPOLOGY and input_topology_file.format == 'tpr' 

60 has_pbc_atoms = bool(pbc_selection) 

61 if image and not is_tpr_available and has_pbc_atoms: 

62 raise InputError('In order to image a simulation with PBC residues using Gromacs it is mandatory to provide a .tpr file') 

63 

64 # If we have coarse grain then we can only fit if we have a tpr file 

65 # It will not work otherwise since we need atom masses 

66 # For some image protocols we need to work with pdbs at some points so we directly surrender 

67 if structure.select_cg(): 

68 if image: raise InputError('We cannot image a coarse grain simulation using Gromacs') 

69 if not is_tpr_available: 

70 raise InputError('We cannot fit a coarse grain simulation using Gromacs without a TPR file') 

71 

72 # First of all save chains 

73 # Gromacs will delete chains so we need to recover them after 

74 chains_backup = get_chains(input_structure_file.path) 

75 

76 # Set a custom index file (.ndx) to select protein and nucleic acids 

77 # This is useful for some imaging steps (centering and fitting) 

78 system_selection = structure.select('all', syntax='vmd') 

79 protein_selection = structure.select_protein() 

80 nucleic_selection = structure.select_nucleic() 

81 custom_selection = protein_selection + nucleic_selection 

82 if not custom_selection: 

83 raise SystemExit(f'The default selection to center (protein or nucleic) is empty. Please image your simulation manually.') 

84 # Exclude PBC residues from this custom selection 

85 custom_selection -= pbc_selection 

86 # Convert both selections to a single ndx file which gromacs can read 

87 system_selection_ndx = system_selection.to_ndx('System') 

88 selection_ndx = custom_selection.to_ndx(CENTER_SELECTION_NAME) 

89 with open(CENTER_INDEX_FILEPATH, 'w') as file: 

90 file.write(system_selection_ndx) 

91 file.write(selection_ndx) 

92 

93 # Imaging -------------------------------------------------------------------------------------- 

94 

95 if image: 

96 print(' Running first imaging step') 

97 

98 # Check if coordinates are to be translated  

99 must_translate = translation != [0, 0, 0] 

100 

101 # If so run the imaging process without the '-center' flag 

102 if must_translate: 

103 # WARNING: Adding '-ur compact' makes everything stay the same 

104 run_gromacs(f'trjconv -s {input_structure_file.path} \ 

105 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

106 -trans {translation[0]} {translation[1]} {translation[2]} \ 

107 -pbc atom', user_input = 'System', show_error_logs = True) 

108 # If no translation is required and we have a tpr then use it to make molecules whole 

109 elif is_tpr_available: 

110 run_gromacs(f'trjconv -s {input_topology_file.path} \ 

111 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

112 -pbc whole', user_input = 'System', show_error_logs = True) 

113 # Otherwise, center the custom selection 

114 else: 

115 run_gromacs(f'trjconv -s {input_structure_file.path} \ 

116 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

117 -pbc atom -center -n {CENTER_INDEX_FILEPATH}', 

118 user_input = f'{CENTER_SELECTION_NAME} System', show_error_logs = True) 

119 

120 # Select the first frame of the recently imaged trayectory as the new topology 

121 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

122 

123 # If there are PBC residues then run a '-pbc mol' to make all residues stay inside the box anytime 

124 if has_pbc_atoms: 

125 print(' Fencing PBC molecules back to the box') 

126 # Run Gromacs 

127 run_gromacs(f'trjconv -s {input_topology_file.path} \ 

128 -f {input_trajectory_file.path} -o {output_trajectory_file.path} \ 

129 -pbc mol -n {CENTER_INDEX_FILEPATH}', 

130 user_input = 'System', show_error_logs = True) 

131 

132 # Select the first frame of the recently translated and imaged trayectory as the new topology 

133 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

134 

135 

136 # ----------------------------------------------------------------------------------------------- 

137 

138 # If there are no PBC residues then run a '-pbc nojump' to avoid non-sense jumping of any molecule 

139 else: 

140 print(' Running no-jump') 

141 # Run Gromacs 

142 # Expanding the box may help in some situations but there are secondary effects 

143 # i.e. -box 999 999 999 

144 run_gromacs(f'trjconv -s {output_structure_file.path} \ 

145 -f {output_trajectory_file.path} -o {output_trajectory_file.path} \ 

146 -pbc nojump', 

147 user_input = 'System', show_error_logs = True) 

148 

149 # Fitting -------------------------------------------------------------------------------------- 

150 

151 # Remove translation and rotation in the trajectory using the custom selection as reference 

152 # WARNING: Sometimes, simulations with membranes do not require this step and they may get worse when fitted 

153 if fit: 

154 print(' Running fitting step') 

155 

156 # The trajectory to fit is the already imaged trajectory 

157 # However, if there was no imaging, the trajectory to fit is the input trajectory 

158 structure_to_fit = output_structure_file if image else input_structure_file 

159 trajectroy_to_fit = output_trajectory_file if image else input_trajectory_file 

160 

161 # If we have a TPR then use it here 

162 # DANI: No estoy seguro de si funcionaría bien después de un imageado 

163 # DANI: Esto siempre lo he hecho usando la estructura de la primera frame ya imageada 

164 if is_tpr_available: structure_to_fit = input_topology_file 

165 

166 # Run Gromacs 

167 run_gromacs(f'trjconv -s {structure_to_fit.path} \ 

168 -f {trajectroy_to_fit.path} -o {output_trajectory_file.path} \ 

169 -fit rot+trans -n {CENTER_INDEX_FILEPATH}', 

170 user_input = f'{CENTER_SELECTION_NAME} System', 

171 show_error_logs = True) 

172 

173 # If there is no output structure at this time (i.e. there was no imaging) then create it now 

174 # Note that the input structure is not necessarily the output structure in this scenario 

175 # The fit may have been done using the topology and there may be an offset, so better dump it 

176 if not output_structure_file.exists: 

177 reset_structure (input_structure_file.path, output_trajectory_file.path, output_structure_file.path) 

178 

179 # Recover chains 

180 set_chains(output_structure_file.path, chains_backup) 

181 

182 # Clean up the index file 

183 # if exists(CENTER_INDEX_FILEPATH): 

184 # remove(CENTER_INDEX_FILEPATH) 

185 

186 

187# Get the first frame of a trajectory 

188def reset_structure ( 

189 input_structure_filepath : str, 

190 input_trajectory_filepath : str, 

191 output_structure_filepath : str, 

192): 

193 print(' Reseting structure after imaging step') 

194 run_gromacs(f'trjconv -s {input_structure_filepath} \ 

195 -f {input_trajectory_filepath} -o {output_structure_filepath} \ 

196 -dump 0', user_input = 'System')