Coverage for mddb_workflow/utils/gmx_spells.py: 52%

226 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from os import remove, rename 

2from os.path import exists, getmtime 

3from shutil import copyfile 

4from subprocess import run, PIPE, Popen 

5from re import search 

6from time import time 

7 

8from mddb_workflow.utils.auxiliar import load_json 

9from mddb_workflow.utils.constants import GROMACS_EXECUTABLE, GREY_HEADER, COLOR_END 

10from mddb_workflow.utils.file import File 

11from mddb_workflow.utils.type_hints import * 

12 

13from mddb_workflow.tools.fix_gromacs_masses import fix_gromacs_masses 

14 

15# Set a function to call gromacs in a more confortable and standarized way: 

16# - Standard gromacs executable, which may be provided by the user 

17# - Gromacs mass fixes by using a custom atommass.dat file with extended atom names 

18# - Hidden unnecessary output logs and grey-colored necessary ones 

19# - Missing output checks 

20# Then return both output and error logs 

21def run_gromacs(command : str, user_input : Optional[str] = None, 

22 expected_output_filepath : Optional[str] = 'auto', 

23 show_output_logs : bool = False, show_error_logs : bool = False) -> tuple[str, str]: 

24 

25 # Run a fix for gromacs if not done before 

26 # Note that this is run always at the moment the code is read, no matter the command or calling origin 

27 fix_gromacs_masses() 

28 

29 # In case we have user input we must open a process to then pipe it in the gromacs process 

30 if user_input: 

31 # The -e option allows the interpretation of '\', which is critial for the make_ndx command 

32 # WARNING: Explicity split by whitespaces 

33 # WARNING: If not specified then breaklines are also removed, which is ciritcal for make_ndx 

34 user_input_process = Popen([ "echo", "-e", *user_input.split(' ') ], stdout=PIPE) 

35 

36 # Get the time at the moment we start to run the command 

37 # WARNING: We must truncate this value, because the file mtime is truncated as well 

38 # Otherwise it may happen that gromacs runs in less than 1 seconds and the start time is higher 

39 start_time = time().__trunc__() 

40 

41 # Set the gromacs process 

42 # Note that at this point the command is not yet run 

43 process = run([ GROMACS_EXECUTABLE, *command.split(), '-quiet' ], 

44 stdin = user_input_process.stdout if user_input else None, 

45 stdout = PIPE, stderr = PIPE if not show_error_logs else None) 

46 

47 # If error is to be shown the color it in grey 

48 # This is usually used to see progress logs 

49 if show_error_logs: print(GREY_HEADER, end='\r') 

50 

51 # Consume the gromacs process output thus running the command 

52 output_logs = process.stdout.decode() 

53 # Consume also error logs, but this will not run the command again 

54 error_logs = process.stderr.decode() if not show_error_logs else None 

55 

56 # End the grey coloring 

57 if show_error_logs: print(COLOR_END, end='\r') 

58 

59 # In case the expected output is set as 'auto' we must guess it from the command 

60 # Normally output comes after the '-o' option 

61 if expected_output_filepath == 'auto': 

62 command_splits = command.split() 

63 if '-o' not in command_splits: expected_output_filepath = None 

64 else: 

65 option_flag_index = command_splits.index('-o') 

66 expected_output_filepath = command_splits[option_flag_index + 1] 

67 

68 # If an output file was expected then check it was created 

69 if expected_output_filepath: 

70 # Make sure output exists 

71 output_exists = exists(expected_output_filepath) 

72 # Make sure output have been generated by the gromacs command and it is not an old output 

73 output_is_old = False 

74 if output_exists: 

75 output_time = getmtime(expected_output_filepath) 

76 output_is_old = output_time < start_time 

77 # If output was not generated then we report the problem 

78 if not output_exists or output_is_old: 

79 # If we are missing the expetced output then report it 

80 print(output_logs) 

81 print(error_logs) 

82 # Recreate the exact command 

83 final_command = f'{GROMACS_EXECUTABLE} {command}' 

84 if user_input: final_command += f' (with user input "{user_input}")' 

85 raise SystemExit(f'Something went wrong with Gromacs while running "{GROMACS_EXECUTABLE} {command}"') 

86 

87 # If all was good then show final logs but only if it was requested 

88 if show_output_logs: print(output_logs) 

89 

90 # Return outputs 

91 return output_logs, error_logs 

92 

93 

94# Get the first frame from a trajectory 

95def get_first_frame (input_structure_filename : str, input_trajectory_filename : str, output_frame_filename : str): 

96 # Run Gromacs 

97 run_gromacs(f'trjconv -s {input_structure_filename} -f {input_trajectory_filename} \ 

98 -o {output_frame_filename} -dump 0', user_input = 'System') 

99 

100# Set function supported formats 

101get_first_frame.format_sets = [ 

102 { 

103 'inputs': { 

104 'input_structure_filename': {'tpr', 'pdb', 'gro'}, 

105 'input_trajectory_filename': {'xtc', 'trr'} 

106 }, 

107 'outputs': { 

108 'output_frame_filename': {'pdb', 'gro'} 

109 } 

110 }, 

111 { 

112 'inputs': { 

113 'input_structure_filename': None, 

114 'input_trajectory_filename': {'xtc', 'trr'} 

115 }, 

116 'outputs': { 

117 'output_frame_filename': {'xtc', 'trr'} 

118 } 

119 }, 

120 { 

121 'inputs': { 

122 'input_structure_filename': None, 

123 'input_trajectory_filename': {'pdb'} 

124 }, 

125 'outputs': { 

126 'output_frame_filename': {'pdb', 'xtc', 'trr'} 

127 } 

128 }, 

129 { 

130 'inputs': { 

131 'input_structure_filename': None, 

132 'input_trajectory_filename': {'gro'} 

133 }, 

134 'outputs': { 

135 'output_frame_filename': {'gro', 'xtc', 'trr'} 

136 } 

137 } 

138] 

139 

140# Get the structure using the first frame getter function 

141def get_structure (input_structure_filename : str, input_trajectory_filename : str, output_structure_filename : str): 

142 get_first_frame(input_structure_filename, input_trajectory_filename, output_structure_filename) 

143get_structure.format_sets = [ 

144 { 

145 'inputs': { 

146 'input_structure_filename': {'tpr', 'pdb', 'gro'}, 

147 'input_trajectory_filename': {'xtc', 'trr'} 

148 }, 

149 'outputs': { 

150 'output_structure_filename': {'pdb', 'gro'} 

151 } 

152 } 

153] 

154 

155# Convert the structure using the first frame getter function (no trajectory is required) 

156def get_structure_alone (input_structure_filename : str, output_structure_filename : str): 

157 get_first_frame(input_structure_filename, input_structure_filename, output_structure_filename) 

158get_structure_alone.format_sets = [ 

159 { 

160 'inputs': { 

161 'input_structure_filename': {'pdb', 'gro'}, 

162 }, 

163 'outputs': { 

164 'output_structure_filename': {'pdb', 'gro'} 

165 } 

166 } 

167] 

168 

169# Get gromacs supported trajectories merged and converted to a different format 

170def merge_and_convert_trajectories (input_trajectory_filenames : list[str], output_trajectory_filename : str): 

171 # Get trajectory formats 

172 sample_trajectory = input_trajectory_filenames[0] 

173 sample_trajectory_file = File(sample_trajectory) 

174 input_trajectories_format = sample_trajectory_file.format 

175 output_trajectory_file = File(output_trajectory_filename) 

176 output_trajectory_format = output_trajectory_file.format 

177 auxiliar_single_trajectory_filename = '.single_trajectory.' + input_trajectories_format 

178 # If we have multiple trajectories then join them 

179 if len(input_trajectory_filenames) > 1: 

180 single_trajectory_filename = auxiliar_single_trajectory_filename 

181 run_gromacs(f'trjcat -f {" ".join(input_trajectory_filenames)} \ 

182 -o {single_trajectory_filename}') 

183 else: 

184 single_trajectory_filename = sample_trajectory 

185 # In case input and output formats are different we must convert the trajectory 

186 if input_trajectories_format != output_trajectory_format: 

187 run_gromacs(f'trjconv -f {single_trajectory_filename} \ 

188 -o {output_trajectory_filename}') 

189 else: 

190 copyfile(single_trajectory_filename, output_trajectory_filename) 

191 # Remove residual files 

192 if exists(auxiliar_single_trajectory_filename): 

193 remove(auxiliar_single_trajectory_filename) 

194 

195merge_and_convert_trajectories.format_sets = [ 

196 { 

197 'inputs': { 

198 'input_trajectory_filenames': {'xtc', 'trr'} 

199 }, 

200 'outputs': { 

201 'output_trajectory_filename': {'xtc', 'trr'} 

202 } 

203 }, 

204 { 

205 'inputs': { 

206 'input_trajectory_filenames': {'pdb'} 

207 }, 

208 'outputs': { 

209 'output_trajectory_filename': {'pdb', 'xtc', 'trr'} 

210 } 

211 }, 

212 { 

213 'inputs': { 

214 'input_trajectory_filenames': {'gro'} 

215 }, 

216 'outputs': { 

217 'output_trajectory_filename': {'gro', 'xtc', 'trr'} 

218 } 

219 } 

220] 

221 

222# Get specific frames from a trajectory 

223def get_trajectory_subset ( 

224 input_trajectory_filename : str, 

225 output_trajectory_filename : str, 

226 start : int = 0, 

227 end : int = None, 

228 step : int = 1, 

229 frames : list[int] = [], 

230 skip : list[int] = [] 

231): 

232 # Set a list with frame indices from 

233 output_frames = frames if frames and len(frames) > 0 else [ frame for frame in range(start, end, step) if frame not in skip ] 

234 

235 # Generate the ndx file to target the desired frames 

236 auxiliar_ndx_filename = '.frames.ndx' 

237 generate_frames_ndx(output_frames, auxiliar_ndx_filename) 

238 

239 # Now run gromacs trjconv command in order to extract the desired frames 

240 run_gromacs(f'trjconv -f {input_trajectory_filename} -o {output_trajectory_filename} \ 

241 -fr {auxiliar_ndx_filename}', user_input = 'System') 

242 

243 # Cleanup the auxiliar ndx file 

244 remove(auxiliar_ndx_filename) 

245 

246 

247get_trajectory_subset.format_sets = [ 

248 { 

249 'inputs': { 

250 'input_trajectory_filename': {'xtc', 'trr'} 

251 }, 

252 'outputs': { 

253 'output_trajectory_filename': {'xtc', 'trr'} 

254 } 

255 } 

256] 

257 

258# Filter trajectory atoms 

259def filter_structure ( 

260 input_structure_file : 'File', 

261 output_structure_file : 'File', 

262 input_selection : 'Selection' 

263): 

264 # Generate a ndx file with the desired selection 

265 filter_selection_name = 'filter' 

266 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

267 filter_index_filename = '.filter.ndx' 

268 with open(filter_index_filename, 'w') as file: 

269 file.write(filter_index_content) 

270 

271 # Filter the structure 

272 run_gromacs(f'editconf -f {input_structure_file.path} -o {output_structure_file.path} \ 

273 -n {filter_index_filename}', user_input = filter_selection_name) 

274 

275 # Cleanup the index file 

276 remove(filter_index_filename) 

277 

278filter_structure.format_sets = [ 

279 { 

280 'inputs': { 

281 'input_structure_file': {'pdb', 'gro'}, 

282 }, 

283 'outputs': { 

284 'output_structure_file': {'pdb', 'gro'} 

285 } 

286 } 

287] 

288 

289# Filter trajectory atoms 

290def filter_trajectory ( 

291 input_structure_file : 'File', 

292 input_trajectory_file : 'File', 

293 output_trajectory_file : 'File', 

294 input_selection : 'Selection' 

295): 

296 # Generate a ndx file with the desired selection 

297 filter_selection_name = 'filter' 

298 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

299 filter_index_filename = '.filter.ndx' 

300 with open(filter_index_filename, 'w') as file: 

301 file.write(filter_index_content) 

302 

303 # Filter the trajectory 

304 # Now run gromacs trjconv command in order to extract the desired frames 

305 run_gromacs(f'trjconv -s {input_structure_file.path} -f {input_trajectory_file.path} \ 

306 -o {output_trajectory_file.path} -n {filter_index_filename}', 

307 user_input = filter_selection_name) 

308 

309 # Cleanup the index file 

310 remove(filter_index_filename) 

311 

312filter_trajectory.format_sets = [ 

313 { 

314 'inputs': { 

315 'input_structure_file': {'tpr', 'pdb', 'gro'}, 

316 'input_trajectory_file': {'xtc', 'trr'} 

317 }, 

318 'outputs': { 

319 'output_trajectory_file': {'xtc', 'trr'} 

320 } 

321 } 

322] 

323 

324# Set a regular expression to further mine data from gromacs logs 

325GROMACS_SYSTEM_ATOMS_REGEX = r'System\) has[ ]+([0-9]*) elements' 

326 

327# Mine system atoms count from gromacs logs 

328def mine_system_atoms_count (logs : str) -> int: 

329 system_atoms_match = search(GROMACS_SYSTEM_ATOMS_REGEX, logs) 

330 if not system_atoms_match: 

331 print(logs) 

332 raise ValueError('Failed to mine Gromacs error logs') 

333 return int(system_atoms_match[1]) 

334 

335# Count TPR atoms 

336def get_tpr_atom_count (tpr_filepath : str) -> int: 

337 # Make sure the filepath is valid 

338 if not exists(tpr_filepath): 

339 raise ValueError('Trying to count atoms from a topology which does not exist') 

340 # Run Gromacs only to see the number of atoms in the TPR 

341 output_logs, error_logs = run_gromacs(f'convert-tpr -s {tpr_filepath}', user_input = "whatever") 

342 # Mine the number of atoms in the system from the logs 

343 atom_count = mine_system_atoms_count(error_logs) 

344 return atom_count 

345 

346# Read and parse a tpr using the MDDB-specific tool from gromacs 

347def read_and_parse_tpr (tpr_filepath : str) -> dict: 

348 expected_output_filepath = 'siminfo.json' 

349 run_gromacs(f'dump -s {tpr_filepath} --json', expected_output_filepath = expected_output_filepath) 

350 parsed_tpr = load_json(expected_output_filepath) 

351 #remove(expected_output_filepath) 

352 return parsed_tpr 

353 

354# Read a tpr file by converting it to ASCII 

355def get_tpr_content (tpr_filepath : str) -> tuple[str, str]: 

356 # Read the tpr file making a 'dump' 

357 return run_gromacs(f'dump -s {tpr_filepath}') 

358 

359# Regular expresion to mine atom charges 

360GROMACS_TPR_ATOM_CHARGES_REGEX = r"q=([0-9e+-. ]*)," 

361 

362# Get tpr atom charges 

363# This works for the new tpr format (tested in 122) 

364def get_tpr_charges (tpr_filepath : str) -> list[float]: 

365 # Read the TPR 

366 tpr_content, tpr_error_logs = get_tpr_content(tpr_filepath) 

367 # Mine the atomic charges 

368 charges = [] 

369 # Iterate tpr content lines 

370 for line in tpr_content.split('\n'): 

371 # Skip everything which is not atomic charges data 

372 if line[0:16] != ' atom': continue 

373 # Parse the line to get only charges 

374 match = search(GROMACS_TPR_ATOM_CHARGES_REGEX, line) 

375 if match: charges.append(float(match[1])) 

376 # If we successfully got atom charges then return them 

377 if len(charges) > 0: return charges 

378 # If there are no charges at the end then something went wrong 

379 print(tpr_error_logs) 

380 raise RuntimeError(f'Charges extraction from tpr file "{tpr_filepath}" has failed') 

381 

382# Regular expresion to mine atom bonds 

383GROMACS_TPR_ATOM_BONDS_REGEX = r"^\s*([0-9]*) type=[0-9]* \((BONDS|CONSTR|CONNBONDS)\)\s*([0-9]*)\s*([0-9]*)$" 

384# Set a regular expression for SETTLE bonds, used for rigid waters 

385# ---- From the paper ------------------------------------------------------------------------------------ 

386# The SETTLE can be applied to a four-point water model like TIP4P5 which has the fourth point with 

387# a certain charge and no mass if the force acting on the fourth point is distributed onto the other three 

388# points with masses in a reasonable manner. 

389# S. Miyamoto and P.A. Kollman, “SETTLE: An analytical version of the SHAKE and RATTLE algorithms for rigid 

390# water models,” J. Comp. Chem., 13 952–962 (1992) 

391# -------------------------------------------------------------------------------------------------------- 

392# So it may happen that we encounter SETTLE with 4 atoms 

393# This is not yet supported, but at least we check if this is happening to raise an error when found 

394GROMACS_TPR_SETTLE_REGEX = r"^\s*([0-9]*) type=[0-9]* \(SETTLE\)\s*([0-9]*)\s*([0-9]*)\s*([0-9]*)\s*([0-9]*)$" 

395 

396# Get tpr atom bonds 

397def get_tpr_bonds (tpr_filepath : str) -> list[ tuple[int, int] ]: 

398 # Read and parse the TPR 

399 parsed_tpr = read_and_parse_tpr(tpr_filepath) 

400 # Get the bonds only 

401 tpr_bonds = parsed_tpr['gromacs-topology']['bond']['value'] 

402 return tpr_bonds 

403 

404# Filter topology atoms 

405# DANI: Note that a TPR file is not a structure but a topology 

406# DANI: However it is important that the argument is called 'structure' for the format finder 

407def filter_tpr ( 

408 input_structure_file : 'File', 

409 output_structure_file : 'File', 

410 input_selection : 'Selection' 

411): 

412 # Generate a ndx file with the desired selection 

413 filter_selection_name = 'filter' 

414 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

415 filter_index_filename = '.filter.ndx' 

416 with open(filter_index_filename, 'w') as file: 

417 file.write(filter_index_content) 

418 

419 # Filter the tpr 

420 run_gromacs(f'convert-tpr -s {input_structure_file.path} -o {output_structure_file.path} \ 

421 -n {filter_index_filename}', user_input = filter_selection_name) 

422 

423 # Cleanup the index file 

424 remove(filter_index_filename) 

425 

426filter_tpr.format_sets = [ 

427 { 

428 'inputs': { 

429 'input_structure_file': {'tpr'}, 

430 }, 

431 'outputs': { 

432 'output_structure_file': {'tpr'} 

433 } 

434 } 

435] 

436 

437# Join xtc files 

438# This is a minimal implementation of 'gmx trjcat' used in loops 

439def merge_xtc_files (current_file : str, new_file : str): 

440 # If the current file does nt exist then set the new file as the current file 

441 if not exists(current_file): 

442 rename(new_file, current_file) 

443 return 

444 # Run trjcat 

445 run_gromacs(f'trjcat -f {new_file.path} {current_file} -o {current_file.path}') 

446 

447# Generate a ndx file with a selection of frames 

448def generate_frames_ndx (frames : list[int], filename : str): 

449 # Add a header  

450 content = '[ frames ]\n' 

451 count = 0 

452 for frame in frames: 

453 # Add a breakline each 15 indices 

454 count += 1 

455 if count == 15: 

456 content += '\n' 

457 count = 0 

458 # Add a space between indices 

459 # Atom indices go from 0 to n-1 

460 # Add +1 to the index since gromacs counts from 1 to n 

461 content += str(frame + 1) + ' ' 

462 content += '\n' 

463 # Write the file 

464 with open(filename, 'w') as file: 

465 file.write(content) 

466 

467# DANI: No se usa, pero me costó un rato ponerla a punto así que la conservo 

468# Set a function to read and parse xpm files with a single matrix 

469# Inspired in https://gromacswrapper.readthedocs.io/en/latest/_modules/gromacs/fileformats/xpm.html#XPM 

470def parse_xpm (filename : str) -> list[ list[float] ]: 

471 with open(filename) as file: 

472 # First lines include metadata such as the title, description and legend 

473 # Read lines until we find the start of the array 

474 metadata = [file.readline()] 

475 while not metadata[-1].startswith("static char *gromacs_xpm[]"): 

476 metadata.append(file.readline()) 

477 # The next line will contain the dimensions of the matrix 

478 # e.g. "7 7 14 1", 

479 dimensions = file.readline().replace('"','').replace(',','').split() 

480 x_dimension, y_dimension, entity_count, x_stride = [ int(i) for i in dimensions ] 

481 # Next lines contain every entity definition 

482 # Every entity has the following: 

483 # - A letter which is used to refer this entity later in the matrix 

484 # - A color in #XXXXXX format 

485 # - The actual value of the entity 

486 # e.g. "A c #FFFFFF " /* "0" */, 

487 # e.g. "B c #EBEBFF " /* "1" */, 

488 entities = {} 

489 for i in range(entity_count): 

490 line = file.readline() 

491 entity_id = line[1] 

492 entity_color = line[6:13] 

493 entity_value = line[18:].split('"')[1] 

494 entities[entity_id] = { 'value': entity_value, 'color': entity_color } 

495 # Next lines are the matrix axis values 

496 x_axis = [] 

497 y_axis = [] 

498 # Every line has a maximum of 80 labels 

499 x_lines = math.ceil(x_dimension / 80) 

500 for l in range(x_lines): 

501 line = file.readline()[12:-3].split() 

502 x_axis += [ int(v) for v in line ] 

503 y_lines = math.ceil(y_dimension / 80) 

504 for l in range(y_lines): 

505 line = file.readline()[12:-3].split() 

506 y_axis += [ int(v) for v in line ] 

507 # Next lines are the matrix rows 

508 matrix = [] 

509 for l in range(y_dimension): 

510 line = file.readline()[1:1+x_dimension] 

511 row = [ letter for letter in line ] 

512 matrix.append(row) 

513 # Check the final matrix size is as expected 

514 if len(matrix) != y_dimension: 

515 raise ValueError('Different number of rows than expected') 

516 sample_row = matrix[-1] 

517 if len(sample_row) != x_dimension: 

518 raise ValueError('Different number of columns than expected') 

519 # Return the output 

520 return { 'entities': entities, 'x_axis': x_axis, 'y_axis': y_axis, 'matrix': matrix } 

521 

522# Filter atoms in a pdb file 

523# This method conserves maximum resolution and chains 

524def pdb_filter ( 

525 input_pdb_filepath : str, 

526 output_pdb_filepath : str, 

527 index_filepath : str, 

528 filter_group_name : str 

529): 

530 # Filter the PDB 

531 run_gromacs(f'editconf -f {input_pdb_filepath} -o {output_pdb_filepath} \ 

532 -n {index_filepath}', user_input = filter_group_name) 

533 

534# Filter atoms in a xtc file 

535# Note that here we do not hide the stderr 

536# This is because it shows the progress 

537# Instead we color the output grey 

538def xtc_filter( 

539 structure_filepath : str, 

540 input_trajectory_filepath : str, 

541 output_trajectory_filepath : str, 

542 index_filepath : str, 

543 filter_group_name : str 

544): 

545 # Filter the trajectory 

546 run_gromacs(f'trjconv -s {structure_filepath} -f {input_trajectory_filepath} \ 

547 -o {output_trajectory_filepath} -n {index_filepath}', user_input = filter_group_name) 

548 

549# Filter atoms in both a pdb and a xtc file 

550def tpr_filter( 

551 input_tpr_filepath : str, 

552 output_tpr_filepath : str, 

553 index_filepath : str, 

554 filter_group_name : str 

555): 

556 # Filter the topology 

557 run_gromacs(f'convert-tpr -s {input_tpr_filepath} -o {output_tpr_filepath} \ 

558 -n {index_filepath}', user_input = filter_group_name) 

559 

560# Create a .ndx file from a complex mask 

561# e.g. no water and no ions -> !"Water"&!"Ion" 

562# This will return the group name to be further used 

563def make_index (input_structure_file : 'File', output_index_file : 'File', mask : str) -> str: 

564 # Make sure the  

565 #if output_index_file.exists: output_index_file.remove() 

566 # Run Gromacs 

567 run_gromacs(f'make_ndx -f {input_structure_file.path} -o {output_index_file.path}', 

568 user_input = f'{mask} \nq') 

569 # The group name is automatically assigned by gromacs 

570 # It equals the mask but removing any " symbols and with a few additional changes 

571 # WARNING: Not all mask symbols are supported here, beware of complex masks 

572 group_name = mask.replace('"','').replace('&','_&_').replace('|','_') 

573 # Check if the group was created 

574 content = read_ndx(output_index_file) 

575 created = group_name in content 

576 return group_name, created 

577 

578# Read a .ndx file 

579def read_ndx (input_index_file : 'File') -> dict: 

580 # Make sure the file exists 

581 if not input_index_file.exists: 

582 raise RuntimeError(f'Index file {input_index_file.path} does not exist') 

583 # Read the ndx file 

584 ndx_content = None 

585 with open(input_index_file.path, 'r') as file: 

586 ndx_content = file.readlines() 

587 # Parse its contents 

588 groups = {} 

589 current_group_name = None 

590 current_group_indices = None 

591 for line in ndx_content: 

592 # If it is the header of a group of indices 

593 if line[0] == '[': 

594 # Add current group to the final dict 

595 if current_group_name != None: 

596 groups[current_group_name] = current_group_indices 

597 # Set the name and the list of indices for the next group 

598 current_group_name = line[2:-3] 

599 current_group_indices = [] 

600 continue 

601 # If it is a line of indices then parse it to actual numbers 

602 # Substract 1 since gromacs indices are 1-based while we want them 0-based 

603 current_group_indices += [ int(index) - 1 for index in line.split() ] 

604 # Add last group to the final dict 

605 groups[current_group_name] = current_group_indices 

606 # Finally return the parsed content 

607 return groups 

608 

609# Set a function to count atoms in a gromacs supported file 

610ATOMS_LINE = r'\n# Atoms\s*([0-9]*)\n' 

611def get_atom_count (mysterious_file : 'File') -> int: 

612 output_logs, error_logs = run_gromacs(f'check -f {mysterious_file.path}') 

613 search_results = search(ATOMS_LINE, error_logs) 

614 if not search_results: raise ValueError('Failed to mine atoms') 

615 return int(search_results[1])