Coverage for mddb_workflow / utils / gmx_spells.py: 53%

230 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1from os import remove, rename 

2from os.path import exists, getmtime 

3from shutil import copyfile 

4from subprocess import run, PIPE, Popen 

5from re import search 

6from time import time 

7 

8from mddb_workflow.utils.auxiliar import load_json, ToolError 

9from mddb_workflow.utils.constants import GROMACS_EXECUTABLE, GREY_HEADER, COLOR_END 

10from mddb_workflow.utils.file import File 

11from mddb_workflow.utils.type_hints import * 

12from mddb_workflow.tools.fix_gromacs_masses import fix_gromacs_masses 

13 

14 

15def run_gromacs( 

16 command : str, 

17 user_input : Optional[str] = None, 

18 expected_output_filepath : Optional[str] = 'auto', 

19 show_output_logs : bool = False, 

20 show_error_logs : bool = False) -> tuple[str, str]: 

21 """ Set a function to call gromacs in a more confortable and standarized way: 

22 - Standard gromacs executable, which may be provided by the user 

23 - Gromacs mass fixes by using a custom atommass.dat file with extended atom names 

24 - Hidden unnecessary output logs and grey-colored necessary ones 

25 - Missing output checks 

26 Then return both output and error logs. """ 

27 

28 # Run a fix for gromacs if not done before 

29 # Note that this is run always at the moment the code is read, no matter the command or calling origin 

30 fix_gromacs_masses() 

31 

32 # In case we have user input we must open a process to then pipe it in the gromacs process 

33 if user_input: 

34 # The -e option allows the interpretation of '\', which is critial for the make_ndx command 

35 # WARNING: Explicity split by whitespaces 

36 # WARNING: If not specified then breaklines are also removed, which is ciritcal for make_ndx 

37 user_input_process = Popen([ "echo", "-e", *user_input.split(' ') ], stdout=PIPE) 

38 

39 # Get the time at the moment we start to run the command 

40 # WARNING: We must truncate this value, because the file mtime is truncated as well 

41 # Otherwise it may happen that gromacs runs in less than 1 seconds and the start time is higher 

42 start_time = time().__trunc__() 

43 

44 # Set the gromacs process 

45 # Note that at this point the command is not yet run 

46 process = run([ GROMACS_EXECUTABLE, *command.split(), '-quiet' ], 

47 stdin = user_input_process.stdout if user_input else None, 

48 stdout = PIPE, stderr = PIPE if not show_error_logs else None) 

49 

50 # If error is to be shown the color it in grey 

51 # This is usually used to see progress logs 

52 if show_error_logs: print(GREY_HEADER, end='\r') 

53 

54 # Consume the gromacs process output thus running the command 

55 output_logs = process.stdout.decode() 

56 # Consume also error logs, but this will not run the command again 

57 error_logs = process.stderr.decode() if not show_error_logs else None 

58 

59 # End the grey coloring 

60 if show_error_logs: print(COLOR_END, end='\r') 

61 

62 # In case the expected output is set as 'auto' we must guess it from the command 

63 # Normally output comes after the '-o' option 

64 if expected_output_filepath == 'auto': 

65 command_splits = command.split() 

66 if '-o' not in command_splits: expected_output_filepath = None 

67 else: 

68 option_flag_index = command_splits.index('-o') 

69 expected_output_filepath = command_splits[option_flag_index + 1] 

70 

71 # If an output file was expected then check it was created 

72 if expected_output_filepath: 

73 # Make sure output exists 

74 output_exists = exists(expected_output_filepath) 

75 # Make sure output have been generated by the gromacs command and it is not an old output 

76 output_is_old = False 

77 if output_exists: 

78 output_time = getmtime(expected_output_filepath) 

79 output_is_old = output_time < start_time 

80 # If output was not generated then we report the problem 

81 if not output_exists or output_is_old: 

82 # If we are missing the expetced output then report it 

83 print(output_logs) 

84 print(error_logs) 

85 # Recreate the exact command 

86 final_command = f'{GROMACS_EXECUTABLE} {command}' 

87 if user_input: final_command += f' (with user input "{user_input}")' 

88 raise ToolError(f'Something went wrong with Gromacs while running "{GROMACS_EXECUTABLE} {command}"') 

89 

90 # If all was good then show final logs but only if it was requested 

91 if show_output_logs: print(output_logs) 

92 

93 # Return outputs 

94 return output_logs, error_logs 

95 

96 

97def get_first_frame (input_structure_filename : str, input_trajectory_filename : str, output_frame_filename : str): 

98 """ Get the first frame from a trajectory. """ 

99 # Run Gromacs 

100 run_gromacs(f'trjconv -s {input_structure_filename} -f {input_trajectory_filename} \ 

101 -o {output_frame_filename} -dump 0', user_input = 'System') 

102 

103# Set function supported formats 

104get_first_frame.format_sets = [ 

105 { 

106 'inputs': { 

107 'input_structure_filename': {'tpr', 'pdb', 'gro'}, 

108 'input_trajectory_filename': {'xtc', 'trr'} 

109 }, 

110 'outputs': { 

111 'output_frame_filename': {'pdb', 'gro'} 

112 } 

113 }, 

114 { 

115 'inputs': { 

116 'input_structure_filename': None, 

117 'input_trajectory_filename': {'xtc', 'trr'} 

118 }, 

119 'outputs': { 

120 'output_frame_filename': {'xtc', 'trr'} 

121 } 

122 }, 

123 { 

124 'inputs': { 

125 'input_structure_filename': None, 

126 'input_trajectory_filename': {'pdb'} 

127 }, 

128 'outputs': { 

129 'output_frame_filename': {'pdb', 'xtc', 'trr'} 

130 } 

131 }, 

132 { 

133 'inputs': { 

134 'input_structure_filename': None, 

135 'input_trajectory_filename': {'gro'} 

136 }, 

137 'outputs': { 

138 'output_frame_filename': {'gro', 'xtc', 'trr'} 

139 } 

140 } 

141] 

142 

143 

144def get_structure (input_structure_filename : str, input_trajectory_filename : str, output_structure_filename : str): 

145 """ Get the structure using the first frame getter function. """ 

146 get_first_frame(input_structure_filename, input_trajectory_filename, output_structure_filename) 

147get_structure.format_sets = [ 

148 { 

149 'inputs': { 

150 'input_structure_filename': {'tpr', 'pdb', 'gro'}, 

151 'input_trajectory_filename': {'xtc', 'trr'} 

152 }, 

153 'outputs': { 

154 'output_structure_filename': {'pdb', 'gro'} 

155 } 

156 } 

157] 

158 

159 

160def get_structure_alone (input_structure_filename : str, output_structure_filename : str): 

161 """ Convert the structure using the first frame getter function (no trajectory is required). """ 

162 get_first_frame(input_structure_filename, input_structure_filename, output_structure_filename) 

163get_structure_alone.format_sets = [ 

164 { 

165 'inputs': { 

166 'input_structure_filename': {'pdb', 'gro'}, 

167 }, 

168 'outputs': { 

169 'output_structure_filename': {'pdb', 'gro'} 

170 } 

171 } 

172] 

173 

174 

175def merge_and_convert_trajectories (input_trajectory_filenames : list[str], output_trajectory_filename : str): 

176 """ Get gromacs supported trajectories merged and converted to a different format. """ 

177 # Get trajectory formats 

178 sample_trajectory = input_trajectory_filenames[0] 

179 sample_trajectory_file = File(sample_trajectory) 

180 input_trajectories_format = sample_trajectory_file.format 

181 output_trajectory_file = File(output_trajectory_filename) 

182 output_trajectory_format = output_trajectory_file.format 

183 auxiliar_single_trajectory_filename = '.single_trajectory.' + input_trajectories_format 

184 # If we have multiple trajectories then join them 

185 if len(input_trajectory_filenames) > 1: 

186 single_trajectory_filename = auxiliar_single_trajectory_filename 

187 run_gromacs(f'trjcat -f {" ".join(input_trajectory_filenames)} \ 

188 -o {single_trajectory_filename}') 

189 else: 

190 single_trajectory_filename = sample_trajectory 

191 # In case input and output formats are different we must convert the trajectory 

192 if input_trajectories_format != output_trajectory_format: 

193 run_gromacs(f'trjconv -f {single_trajectory_filename} \ 

194 -o {output_trajectory_filename}') 

195 else: 

196 copyfile(single_trajectory_filename, output_trajectory_filename) 

197 # Remove residual files 

198 if exists(auxiliar_single_trajectory_filename): 

199 remove(auxiliar_single_trajectory_filename) 

200 

201merge_and_convert_trajectories.format_sets = [ 

202 { 

203 'inputs': { 

204 'input_trajectory_filenames': {'xtc', 'trr'} 

205 }, 

206 'outputs': { 

207 'output_trajectory_filename': {'xtc', 'trr'} 

208 } 

209 }, 

210 { 

211 'inputs': { 

212 'input_trajectory_filenames': {'pdb'} 

213 }, 

214 'outputs': { 

215 'output_trajectory_filename': {'pdb', 'xtc', 'trr'} 

216 } 

217 }, 

218 { 

219 'inputs': { 

220 'input_trajectory_filenames': {'gro'} 

221 }, 

222 'outputs': { 

223 'output_trajectory_filename': {'gro', 'xtc', 'trr'} 

224 } 

225 } 

226] 

227 

228 

229def get_trajectory_subset ( 

230 input_trajectory_filename : str, 

231 output_trajectory_filename : str, 

232 start : int = 0, 

233 end : int = None, 

234 step : int = 1, 

235 frames : list[int] = [], 

236 skip : list[int] = [] 

237): 

238 """ Get specific frames from a trajectory. """ 

239 # Set a list with frame indices from 

240 output_frames = frames if frames and len(frames) > 0 else [ frame for frame in range(start, end, step) if frame not in skip ] 

241 

242 # Generate the ndx file to target the desired frames 

243 auxiliar_ndx_filename = '.frames.ndx' 

244 generate_frames_ndx(output_frames, auxiliar_ndx_filename) 

245 

246 # Now run gromacs trjconv command in order to extract the desired frames 

247 run_gromacs(f'trjconv -f {input_trajectory_filename} -o {output_trajectory_filename} \ 

248 -fr {auxiliar_ndx_filename}', user_input = 'System') 

249 

250 # Cleanup the auxiliar ndx file 

251 remove(auxiliar_ndx_filename) 

252 

253 

254get_trajectory_subset.format_sets = [ 

255 { 

256 'inputs': { 

257 'input_trajectory_filename': {'xtc', 'trr'} 

258 }, 

259 'outputs': { 

260 'output_trajectory_filename': {'xtc', 'trr'} 

261 } 

262 } 

263] 

264 

265 

266def filter_structure ( 

267 input_structure_file : 'File', 

268 output_structure_file : 'File', 

269 input_selection : 'Selection' 

270): 

271 """ Filter trajectory atoms. """ 

272 # Generate a ndx file with the desired selection 

273 filter_selection_name = 'filter' 

274 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

275 filter_index_filename = '.filter.ndx' 

276 with open(filter_index_filename, 'w') as file: 

277 file.write(filter_index_content) 

278 

279 # Filter the structure 

280 run_gromacs(f'editconf -f {input_structure_file.path} -o {output_structure_file.path} \ 

281 -n {filter_index_filename}', user_input = filter_selection_name) 

282 

283 # Cleanup the index file 

284 remove(filter_index_filename) 

285 

286filter_structure.format_sets = [ 

287 { 

288 'inputs': { 

289 'input_structure_file': {'pdb', 'gro'}, 

290 }, 

291 'outputs': { 

292 'output_structure_file': {'pdb', 'gro'} 

293 } 

294 } 

295] 

296 

297 

298def filter_trajectory ( 

299 input_structure_file : 'File', 

300 input_trajectory_file : 'File', 

301 output_trajectory_file : 'File', 

302 input_selection : 'Selection' 

303): 

304 """ Filter trajectory atoms. """ 

305 # Generate a ndx file with the desired selection 

306 filter_selection_name = 'filter' 

307 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

308 filter_index_filename = '.filter.ndx' 

309 with open(filter_index_filename, 'w') as file: 

310 file.write(filter_index_content) 

311 

312 # Filter the trajectory 

313 # Now run gromacs trjconv command in order to extract the desired frames 

314 run_gromacs(f'trjconv -s {input_structure_file.path} -f {input_trajectory_file.path} \ 

315 -o {output_trajectory_file.path} -n {filter_index_filename}', 

316 user_input = filter_selection_name) 

317 

318 # Cleanup the index file 

319 remove(filter_index_filename) 

320 

321filter_trajectory.format_sets = [ 

322 { 

323 'inputs': { 

324 'input_structure_file': {'tpr', 'pdb', 'gro'}, 

325 'input_trajectory_file': {'xtc', 'trr'} 

326 }, 

327 'outputs': { 

328 'output_trajectory_file': {'xtc', 'trr'} 

329 } 

330 } 

331] 

332 

333# Set a regular expression to further mine data from gromacs logs 

334GROMACS_SYSTEM_ATOMS_REGEX = r'System\) has[ ]+([0-9]*) elements' 

335 

336 

337def mine_system_atoms_count (logs : str) -> int: 

338 """ Mine system atoms count from gromacs logs. """ 

339 system_atoms_match = search(GROMACS_SYSTEM_ATOMS_REGEX, logs) 

340 if not system_atoms_match: 

341 print(logs) 

342 raise ValueError('Failed to mine Gromacs error logs') 

343 return int(system_atoms_match[1]) 

344 

345 

346def get_tpr_atom_count (tpr_filepath : str) -> int: 

347 """ Count TPR atoms. """ 

348 # Make sure the filepath is valid 

349 if not exists(tpr_filepath): 

350 raise ValueError('Trying to count atoms from a topology which does not exist') 

351 # Run Gromacs only to see the number of atoms in the TPR 

352 output_logs, error_logs = run_gromacs(f'convert-tpr -s {tpr_filepath}', user_input = "whatever") 

353 # Mine the number of atoms in the system from the logs 

354 atom_count = mine_system_atoms_count(error_logs) 

355 return atom_count 

356 

357 

358def read_and_parse_tpr (tpr_filepath : str) -> dict: 

359 """ Read and parse a tpr using the MDDB-specific tool from gromacs. """ 

360 expected_output_filepath = 'siminfo.json' 

361 run_gromacs(f'dump -s {tpr_filepath} --json', expected_output_filepath = expected_output_filepath) 

362 parsed_tpr = load_json(expected_output_filepath) 

363 remove(expected_output_filepath) 

364 return parsed_tpr 

365 

366 

367def get_tpr_content (tpr_filepath : str) -> tuple[str, str]: 

368 """ Read a tpr file by converting it to ASCII. """ 

369 # Read the tpr file making a 'dump' 

370 return run_gromacs(f'dump -s {tpr_filepath}') 

371 

372# Regular expresion to mine atom charges 

373GROMACS_TPR_ATOM_CHARGES_REGEX = r"q=([0-9e+-. ]*)," 

374 

375 

376def get_tpr_charges (tpr_filepath : str) -> list[float]: 

377 """ Get tpr atom charges. 

378 This works for the new tpr format (tested in 122). """ 

379 # Read the TPR 

380 tpr_content, tpr_error_logs = get_tpr_content(tpr_filepath) 

381 # Mine the atomic charges 

382 charges = [] 

383 # Iterate tpr content lines 

384 for line in tpr_content.split('\n'): 

385 # Skip everything which is not atomic charges data 

386 if line[0:16] != ' atom': continue 

387 # Parse the line to get only charges 

388 match = search(GROMACS_TPR_ATOM_CHARGES_REGEX, line) 

389 if match: charges.append(float(match[1])) 

390 # If we successfully got atom charges then return them 

391 if len(charges) > 0: return charges 

392 # If there are no charges at the end then something went wrong 

393 print(tpr_error_logs) 

394 raise RuntimeError(f'Charges extraction from tpr file "{tpr_filepath}" has failed') 

395 

396# Regular expresion to mine atom bonds 

397GROMACS_TPR_ATOM_BONDS_REGEX = r"^\s*([0-9]*) type=[0-9]* \((BONDS|CONSTR|CONNBONDS)\)\s*([0-9]*)\s*([0-9]*)$" 

398# Set a regular expression for SETTLE bonds, used for rigid waters 

399# ---- From the paper ------------------------------------------------------------------------------------ 

400# The SETTLE can be applied to a four-point water model like TIP4P5 which has the fourth point with 

401# a certain charge and no mass if the force acting on the fourth point is distributed onto the other three 

402# points with masses in a reasonable manner. 

403# S. Miyamoto and P.A. Kollman, “SETTLE: An analytical version of the SHAKE and RATTLE algorithms for rigid 

404# water models,” J. Comp. Chem., 13 952–962 (1992) 

405# -------------------------------------------------------------------------------------------------------- 

406# So it may happen that we encounter SETTLE with 4 atoms 

407# This is not yet supported, but at least we check if this is happening to raise an error when found 

408GROMACS_TPR_SETTLE_REGEX = r"^\s*([0-9]*) type=[0-9]* \(SETTLE\)\s*([0-9]*)\s*([0-9]*)\s*([0-9]*)\s*([0-9]*)$" 

409 

410 

411def get_tpr_bonds (tpr_filepath : str) -> list[ tuple[int, int] ]: 

412 """ Get tpr atom bonds. """ 

413 # Read and parse the TPR 

414 parsed_tpr = read_and_parse_tpr(tpr_filepath) 

415 topology = parsed_tpr['gromacs-topology'] 

416 # Get the actual bonds 

417 tpr_bonds = topology['bond']['value'] 

418 # Get VSITEN bond-like annotations 

419 # These are not actual bonds, but some CG models such as Martini use them as such 

420 # If not consider, some beads may remain non-bonded to anything thus breaking our logic 

421 # DANI: I check for the field to exist just to support old versions of the tool 

422 virtuals = topology.get('virtual_site_n', None) 

423 if virtuals: tpr_bonds += virtuals['value'] 

424 return tpr_bonds 

425 

426 

427def filter_tpr ( 

428 input_structure_file : 'File', 

429 output_structure_file : 'File', 

430 input_selection : 'Selection' 

431): 

432 """ Filter topology atoms. 

433 DANI: Note that a TPR file is not a structure but a topology 

434 DANI: However it is important that the argument is called 'structure' for the format finder""" 

435 # Generate a ndx file with the desired selection 

436 filter_selection_name = 'filter' 

437 filter_index_content = input_selection.to_ndx(selection_name=filter_selection_name) 

438 filter_index_filename = '.filter.ndx' 

439 with open(filter_index_filename, 'w') as file: 

440 file.write(filter_index_content) 

441 

442 # Filter the tpr 

443 run_gromacs(f'convert-tpr -s {input_structure_file.path} -o {output_structure_file.path} \ 

444 -n {filter_index_filename}', user_input = filter_selection_name) 

445 

446 # Cleanup the index file 

447 remove(filter_index_filename) 

448 

449filter_tpr.format_sets = [ 

450 { 

451 'inputs': { 

452 'input_structure_file': {'tpr'}, 

453 }, 

454 'outputs': { 

455 'output_structure_file': {'tpr'} 

456 } 

457 } 

458] 

459 

460 

461def merge_xtc_files (current_file : str, new_file : str): 

462 """ Join xtc files. 

463 This is a minimal implementation of 'gmx trjcat' used in loops. """ 

464 # If the current file does nt exist then set the new file as the current file 

465 if not exists(current_file): 

466 rename(new_file, current_file) 

467 return 

468 # Run trjcat 

469 run_gromacs(f'trjcat -f {new_file.path} {current_file} -o {current_file.path}') 

470 

471 

472def generate_frames_ndx (frames : list[int], filename : str): 

473 """ Generate a ndx file with a selection of frames. """ 

474 # Add a header 

475 content = '[ frames ]\n' 

476 count = 0 

477 for frame in frames: 

478 # Add a breakline each 15 indices 

479 count += 1 

480 if count == 15: 

481 content += '\n' 

482 count = 0 

483 # Add a space between indices 

484 # Atom indices go from 0 to n-1 

485 # Add +1 to the index since gromacs counts from 1 to n 

486 content += str(frame + 1) + ' ' 

487 content += '\n' 

488 # Write the file 

489 with open(filename, 'w') as file: 

490 file.write(content) 

491 

492# DANI: No se usa, pero me costó un rato ponerla a punto así que la conservo 

493# Set a function to read and parse xpm files with a single matrix 

494# Inspired in https://gromacswrapper.readthedocs.io/en/latest/_modules/gromacs/fileformats/xpm.html#XPM 

495def parse_xpm (filename : str) -> list[ list[float] ]: 

496 with open(filename) as file: 

497 # First lines include metadata such as the title, description and legend 

498 # Read lines until we find the start of the array 

499 metadata = [file.readline()] 

500 while not metadata[-1].startswith("static char *gromacs_xpm[]"): 

501 metadata.append(file.readline()) 

502 # The next line will contain the dimensions of the matrix 

503 # e.g. "7 7 14 1", 

504 dimensions = file.readline().replace('"','').replace(',','').split() 

505 x_dimension, y_dimension, entity_count, x_stride = [ int(i) for i in dimensions ] 

506 # Next lines contain every entity definition 

507 # Every entity has the following: 

508 # - A letter which is used to refer this entity later in the matrix 

509 # - A color in #XXXXXX format 

510 # - The actual value of the entity 

511 # e.g. "A c #FFFFFF " /* "0" */, 

512 # e.g. "B c #EBEBFF " /* "1" */, 

513 entities = {} 

514 for i in range(entity_count): 

515 line = file.readline() 

516 entity_id = line[1] 

517 entity_color = line[6:13] 

518 entity_value = line[18:].split('"')[1] 

519 entities[entity_id] = { 'value': entity_value, 'color': entity_color } 

520 # Next lines are the matrix axis values 

521 x_axis = [] 

522 y_axis = [] 

523 # Every line has a maximum of 80 labels 

524 x_lines = math.ceil(x_dimension / 80) # type: ignore 

525 for l in range(x_lines): 

526 line = file.readline()[12:-3].split() 

527 x_axis += [ int(v) for v in line ] 

528 y_lines = math.ceil(y_dimension / 80) # type: ignore 

529 for l in range(y_lines): 

530 line = file.readline()[12:-3].split() 

531 y_axis += [ int(v) for v in line ] 

532 # Next lines are the matrix rows 

533 matrix = [] 

534 for l in range(y_dimension): 

535 line = file.readline()[1:1+x_dimension] 

536 row = [ letter for letter in line ] 

537 matrix.append(row) 

538 # Check the final matrix size is as expected 

539 if len(matrix) != y_dimension: 

540 raise ValueError('Different number of rows than expected') 

541 sample_row = matrix[-1] 

542 if len(sample_row) != x_dimension: 

543 raise ValueError('Different number of columns than expected') 

544 # Return the output 

545 return { 'entities': entities, 'x_axis': x_axis, 'y_axis': y_axis, 'matrix': matrix } 

546 

547 

548def pdb_filter ( 

549 input_pdb_filepath : str, 

550 output_pdb_filepath : str, 

551 index_filepath : str, 

552 filter_group_name : str 

553): 

554 """ Filter atoms in a pdb file. 

555 This method conserves maximum resolution and chains. """ 

556 # Filter the PDB 

557 run_gromacs(f'editconf -f {input_pdb_filepath} -o {output_pdb_filepath} \ 

558 -n {index_filepath}', user_input = filter_group_name) 

559 

560 

561def xtc_filter( 

562 structure_filepath : str, 

563 input_trajectory_filepath : str, 

564 output_trajectory_filepath : str, 

565 index_filepath : str, 

566 filter_group_name : str 

567): 

568 """ Filter atoms in a xtc file. 

569 Note that here we do not hide the stderr. 

570 This is because it shows the progress. 

571 Instead we color the output grey. """ 

572 # Filter the trajectory 

573 run_gromacs(f'trjconv -s {structure_filepath} -f {input_trajectory_filepath} \ 

574 -o {output_trajectory_filepath} -n {index_filepath}', user_input = filter_group_name) 

575 

576 

577def tpr_filter( 

578 input_tpr_filepath : str, 

579 output_tpr_filepath : str, 

580 index_filepath : str, 

581 filter_group_name : str 

582): 

583 """ Filter atoms in both a pdb and a xtc file. """ 

584 # Filter the topology 

585 run_gromacs(f'convert-tpr -s {input_tpr_filepath} -o {output_tpr_filepath} \ 

586 -n {index_filepath}', user_input = filter_group_name) 

587 

588 

589def make_index (input_structure_file : 'File', output_index_file : 'File', mask : str) -> str: 

590 """ Create a .ndx file from a complex mask. 

591 e.g. no water and no ions -> !"Water"&!"Ion". 

592 This will return the group name to be further used. """ 

593 # Make sure the 

594 #if output_index_file.exists: output_index_file.remove() 

595 # Run Gromacs 

596 run_gromacs(f'make_ndx -f {input_structure_file.path} -o {output_index_file.path}', 

597 user_input = f'{mask} \nq') 

598 # The group name is automatically assigned by gromacs 

599 # It equals the mask but removing any " symbols and with a few additional changes 

600 # WARNING: Not all mask symbols are supported here, beware of complex masks 

601 group_name = mask.replace('"','').replace('&','_&_').replace('|','_') 

602 # Check if the group was created 

603 content = read_ndx(output_index_file) 

604 created = group_name in content 

605 return group_name, created 

606 

607 

608def read_ndx (input_index_file : 'File') -> dict: 

609 """ Read a .ndx file """ 

610 # Make sure the file exists 

611 if not input_index_file.exists: 

612 raise RuntimeError(f'Index file {input_index_file.path} does not exist') 

613 # Read the ndx file 

614 ndx_content = None 

615 with open(input_index_file.path, 'r') as file: 

616 ndx_content = file.readlines() 

617 # Parse its contents 

618 groups = {} 

619 current_group_name = None 

620 current_group_indices = None 

621 for line in ndx_content: 

622 # If it is the header of a group of indices 

623 if line[0] == '[': 

624 # Add current group to the final dict 

625 if current_group_name != None: 

626 groups[current_group_name] = current_group_indices 

627 # Set the name and the list of indices for the next group 

628 current_group_name = line[2:-3] 

629 current_group_indices = [] 

630 continue 

631 # If it is a line of indices then parse it to actual numbers 

632 # Substract 1 since gromacs indices are 1-based while we want them 0-based 

633 current_group_indices += [ int(index) - 1 for index in line.split() ] 

634 # Add last group to the final dict 

635 groups[current_group_name] = current_group_indices 

636 # Finally return the parsed content 

637 return groups 

638 

639 

640ATOMS_LINE = r'\n# Atoms\s*([0-9]*)\n' 

641def get_atom_count (mysterious_file : 'File') -> int: 

642 """ Count atoms in a gromacs supported file. """ 

643 output_logs, error_logs = run_gromacs(f'check -f {mysterious_file.path}') 

644 search_results = search(ATOMS_LINE, error_logs) 

645 if not search_results: raise ValueError('Failed to mine atoms') 

646 return int(search_results[1])