Coverage for mddb_workflow / console.py: 77%

320 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1from os.path import exists 

2from shutil import copyfile 

3from subprocess import call 

4from argparse import ArgumentParser, RawTextHelpFormatter, Action, _SubParsersAction 

5from textwrap import wrap, dedent 

6import re 

7 

8from mddb_workflow.mwf import workflow, Project, requestables, DEPENDENCY_FLAGS 

9from mddb_workflow.utils.structures import Structure 

10from mddb_workflow.utils.file import File 

11from mddb_workflow.utils.filters import filter_atoms 

12from mddb_workflow.utils.subsets import get_trajectory_subset 

13from mddb_workflow.utils.constants import * 

14from mddb_workflow.utils.auxiliar import InputError 

15from mddb_workflow.utils.nassa_file import generate_nassa_config 

16from mddb_workflow.tools.conversions import convert 

17from mddb_workflow.tools.check_inputs import TRAJECTORY_SUPPORTED_FORMATS, TOPOLOGY_SUPPORTED_FORMATS, STRUCTURE_SUPPORTED_FORMATS 

18from mddb_workflow.analyses.nassa import workflow_nassa 

19from mddb_workflow.core.dataset import Dataset 

20 

21expected_project_args = set(Project.__init__.__code__.co_varnames) 

22 

23test_docs_url = 'https://mddb-workflow.readthedocs.io/en/latest/usage.html#tests-and-other-checking-processes' 

24task_docs_url = 'https://mddb-workflow.readthedocs.io/en/latest/tasks.html' 

25 

26 

27class CustomHelpFormatter(RawTextHelpFormatter): 

28 """Custom formatter for argparse help text with better organization and spacing.""" 

29 

30 def __init__(self, prog, indent_increment=2, max_help_position=6, width=None): 

31 super().__init__(prog, indent_increment, max_help_position, width) 

32 

33 def _get_help_string(self, action): 

34 import argparse 

35 help = action.help 

36 if '%(default)' not in action.help: 

37 if action.default is not argparse.SUPPRESS and \ 

38 action.default and action.default != '.': 

39 defaulting_nargs = [argparse.OPTIONAL, argparse.ZERO_OR_MORE] 

40 if action.option_strings or action.nargs in defaulting_nargs: 

41 help += '\nDefault: %(default)s' 

42 return help 

43 

44 def _split_lines(self, text, width): 

45 lines = [] 

46 for line in text.splitlines(): 

47 if line.strip() != '': 

48 if line.startswith('https'): 

49 lines.append(line) 

50 else: 

51 lines.extend(wrap(line, width, break_long_words=False, replace_whitespace=False)) 

52 return lines 

53 

54 def _format_usage(self, usage, actions, groups, prefix): 

55 essential_usage = super()._format_usage(usage, actions, groups, prefix) 

56 # Only for mwf run 

57 if 'run' in self._prog: 

58 # Combine the aguments for -i, -e, -ow 

59 lines = essential_usage.split('\n') 

60 filtered_lines = [] 

61 for line in lines: 

62 if line.strip().startswith("[-i "): 

63 line = line.replace("[-i", "[-i/-e/-ow") 

64 filtered_lines.append(line) 

65 elif line.strip().startswith("[-e") or line.strip().startswith("[-ow"): 

66 continue 

67 else: 

68 filtered_lines.append(line) 

69 essential_usage = '\n'.join(filtered_lines) 

70 return essential_usage 

71 

72 def _format_action_invocation(self, action): 

73 """Format the display of options with choices more cleanly.""" 

74 if not action.option_strings: 

75 # This is a positional argument 

76 return super()._format_action_invocation(action) 

77 

78 # For options with choices, format them nicely 

79 opts = ', '.join(action.option_strings) 

80 

81 # Special case for include, exclude, and overwrite 

82 if action.dest in ['include', 'exclude', 'overwrite']: 

83 opts = ', '.join(action.option_strings) 

84 metavar = 'TASKS' 

85 return f"{opts} {metavar}" 

86 if action.nargs == 0: 

87 # Boolean flag 

88 return opts 

89 else: 

90 # Format with metavar or choices 

91 metavar = self._format_args(action, action.dest.upper()) 

92 if action.choices: 

93 choice_str = '{' + ','.join(str(c) for c in action.choices) + '}' 

94 # if action.nargs is not None and action.nargs != 1: 

95 # choice_str += ' ...' 

96 return f"{opts} [{choice_str}]" 

97 else: 

98 return f"{opts} {metavar}" 

99 

100 

101class CustomArgumentParser(ArgumentParser): 

102 """Extends the ArgumentParser to handle subcommands and errors more gracefully.""" 

103 

104 def error(self, message): 

105 # Check for subcommand in sys.argv 

106 import sys 

107 # Extract subcommand from command line if it exists 

108 if hasattr(self, '_subparsers') and self._subparsers is not None: 

109 subcommands = [choice for action in self._subparsers._actions 

110 if isinstance(action, _SubParsersAction) 

111 for choice in action.choices] 

112 if len(sys.argv) > 1 and sys.argv[1] in subcommands: 

113 self.subcommand = sys.argv[1] 

114 

115 # Now continue with your existing logic 

116 if hasattr(self, 'subcommand') and self.subcommand: 

117 self._print_message(f"{self.prog} {self.subcommand}: error: {message}\n", sys.stderr) 

118 # Show help for the specific subparser 

119 for action in self._subparsers._actions: 

120 if isinstance(action, _SubParsersAction): 

121 for choice, subparser in action.choices.items(): 

122 if choice == self.subcommand: 

123 subparser.print_usage() 

124 break 

125 else: 

126 # Default error behavior for main parser 

127 self.print_usage(sys.stderr) 

128 self._print_message(f"{self.prog}: error: {message}\n", sys.stderr) 

129 sys.exit(2) 

130 

131 

132def parse_docstring_for_help(docstring): 

133 """Parse a docstring to extract help for arguments.""" 

134 if not docstring: 

135 return {} 

136 

137 docstring = dedent(docstring) 

138 arg_section_match = re.search(r'Args:\n(.*?)(?=\n\n|\Z)', docstring, re.S) 

139 if not arg_section_match: 

140 return {} 

141 

142 arg_section = arg_section_match.group(1) 

143 help_dict = {} 

144 # Regex to find argument name and its full description, including newlines 

145 arg_blocks = re.findall(r'^\s*([a-zA-Z0-9_]+)\s*\(.*?\):\s*(.*?)(?=\n\s*[a-zA-Z0-9_]+\s*\(|\Z)', arg_section, re.S | re.M) 

146 

147 for arg_name, help_text in arg_blocks: 

148 # Clean up the help text: remove leading/trailing whitespace from each line and join 

149 lines = [] 

150 for line in help_text.strip().split('\n'): 

151 # For lines that are part of a list, just rstrip to keep indentation 

152 if line.lstrip().startswith('-'): 

153 lines.append(' ' + line.strip()) 

154 else: 

155 lines.append(line.strip()) 

156 help_dict[arg_name] = '\n'.join(lines) 

157 

158 return help_dict 

159 

160 

161def pretty_list(availables: list[str]) -> str: 

162 """Pretty print a list of available checkings / failures.""" 

163 final_line = '\n' 

164 for available in availables: 

165 nice_name = NICE_NAMES.get(available, None) 

166 if not nice_name: 

167 raise Exception('Flag "' + available + '" has not a defined nice name') 

168 final_line += '\n - ' + available + ': ' + nice_name 

169 final_line += f'\nTo know more about each test please visit:\n{test_docs_url}' 

170 return final_line 

171 

172 

173class custom (Action): 

174 """Custom argparse action to handle the following 2 arguments. 

175 

176 This is done becuase it is not possible to combine nargs='*' with const 

177 https://stackoverflow.com/questions/72803090/argparse-how-to-create-the-equivalent-of-const-with-nargs 

178 """ 

179 

180 def __call__(self, parser, namespace, values, option_string=None): 

181 """Handle optional argument values with smart defaults. 

182 

183 If argument is not passed -> default 

184 If argument is passed empty -> const 

185 If argument is passed with values -> values 

186 """ 

187 if values: 

188 setattr(namespace, self.dest, values) 

189 else: 

190 setattr(namespace, self.dest, self.const) 

191 

192 

193def main(): 

194 """Parse the arguments and calls the workflow accordingly.""" 

195 # Parse input arguments from the console 

196 # The vars function converts the args object to a dictionary 

197 args = parser.parse_args() 

198 if hasattr(args, 'subcommand') and args.subcommand: 

199 parser.subcommand = args.subcommand 

200 # Apply common arguments as necessary 

201 if hasattr(args, 'no_symlinks') and args.no_symlinks: 

202 GLOBALS['no_symlinks'] = True 

203 if hasattr(args, 'no_colors') and args.no_colors: 

204 GLOBALS['no_colors'] = True 

205 # Find which subcommand was called 

206 subcommand = args.subcommand 

207 # If there is not subcommand then print help 

208 if not subcommand: 

209 parser.print_help() 

210 # If user wants to run the workflow 

211 elif subcommand == "run": 

212 # Ger all parsed arguments 

213 dict_args = vars(args) 

214 # Remove arguments not related to this subcommand 

215 del dict_args['subcommand'] 

216 # Remove common arguments from the dict as well 

217 common_args = [action.dest for action in common_parser._actions] 

218 for arg in common_args: 

219 del dict_args[arg] 

220 # Find out which arguments are for the Project class and which ones are for the workflow 

221 project_args = {} 

222 workflow_args = {} 

223 for k, v in dict_args.items(): 

224 if k in expected_project_args: 

225 project_args[k] = v 

226 else: 

227 workflow_args[k] = v 

228 # Call the actual main function 

229 workflow(project_parameters=project_args, **workflow_args) 

230 # If user wants to setup the inputs 

231 elif subcommand == "inputs": 

232 # Make a copy of the template in the local directory if there is not an inputs file yet 

233 if exists(DEFAULT_INPUTS_FILENAME): 

234 print(f"File {DEFAULT_INPUTS_FILENAME} already exists") 

235 else: 

236 copyfile(INPUTS_TEMPLATE_FILEPATH, DEFAULT_INPUTS_FILENAME) 

237 print(f"File {DEFAULT_INPUTS_FILENAME} has been generated") 

238 # Set the editor to be used to modify the inputs file 

239 editor_command = args.editor 

240 if editor_command: 

241 if editor_command == 'none': return 

242 return call([editor_command, DEFAULT_INPUTS_FILENAME]) 

243 # If no editor argument is passed then ask the user for one 

244 print("Choose your preferred editor:") 

245 available_editors = list(AVAILABLE_TEXT_EDITORS.keys()) 

246 for i, editor_name in enumerate(available_editors, 1): 

247 print(f"{i}. {editor_name}") 

248 print("*. exit") 

249 try: 

250 choice = int(input("Number: ").strip()) 

251 if not (1 <= choice <= len(available_editors)): raise ValueError 

252 editor_name = available_editors[choice - 1] 

253 editor_command = AVAILABLE_TEXT_EDITORS[editor_name] 

254 # Open a text editor for the user 

255 print(f"{editor_name} was selected") 

256 call([editor_command, DEFAULT_INPUTS_FILENAME]) 

257 except ValueError: 

258 print("No editor was selected") 

259 

260 # In case the convert tool was called 

261 elif subcommand == 'convert': 

262 # If no input arguments are passed print help 

263 if args.input_structure is None and args.input_trajectories is None: 

264 convert_parser.print_help() 

265 return 

266 if args.input_trajectories is None: 

267 args.input_trajectories = [] 

268 # Run the convert command 

269 convert( 

270 input_structure_filepath=args.input_structure, 

271 output_structure_filepath=args.output_structure, 

272 input_trajectory_filepaths=args.input_trajectories, 

273 output_trajectory_filepath=args.output_trajectory, 

274 ) 

275 

276 # In case the filter tool was called 

277 elif subcommand == 'filter': 

278 input_structure_file = File(args.input_structure) if args.input_structure else None 

279 output_structure_file = File(args.output_structure) if args.output_structure else None 

280 input_trajectory_file = File(args.input_trajectory) if args.input_trajectory else None 

281 output_trajectory_file = File(args.output_trajectory) if args.output_trajectory else None 

282 # Run the convert command 

283 filter_atoms( 

284 input_structure_file=input_structure_file, 

285 output_structure_file=output_structure_file, 

286 input_trajectory_file=input_trajectory_file, 

287 output_trajectory_file=output_trajectory_file, 

288 selection_string=args.selection_string, 

289 selection_syntax=args.selection_syntax 

290 ) 

291 print('There you have it :)') 

292 

293 # In case the subset tool was called 

294 elif subcommand == 'subset': 

295 output_trajectory = args.output_trajectory if args.output_trajectory else args.input_trajectory 

296 get_trajectory_subset( 

297 input_structure_file=File(args.input_structure), 

298 input_trajectory_file=File(args.input_trajectory), 

299 output_trajectory_file=File(output_trajectory), 

300 start=args.start, 

301 end=args.end, 

302 step=args.step, 

303 skip=args.skip, 

304 frames=args.frames 

305 ) 

306 print('All done :)') 

307 

308 # In case the chainer tool was called 

309 elif subcommand == 'chainer': 

310 # Parse the structure 

311 structure = Structure.from_pdb_file(args.input_structure) 

312 # Select atom accoridng to inputs 

313 selection = structure.select(args.selection_string, args.selection_syntax) if args.selection_string else structure.select_all() 

314 if not selection: raise InputError(f'Empty selection {selection}') 

315 # Run the chainer logic 

316 structure.chainer(selection, args.letter, args.whole_fragments) 

317 # Generate the output file from the modified structure 

318 structure.generate_pdb_file(args.output_structure) 

319 print(f'Changes written to {args.output_structure}') 

320 elif subcommand == 'dataset': 

321 if not hasattr(args, 'dataset_subcommand') or not args.dataset_subcommand: 

322 dataset_parser.print_help() 

323 return 

324 

325 dataset = Dataset(dataset_yaml_path=args.dataset_yaml) 

326 

327 if args.dataset_subcommand == 'run': 

328 dataset.launch_workflow( 

329 include_groups=args.include_groups, 

330 exclude_groups=args.exclude_groups, 

331 n_jobs=args.n_jobs, 

332 slurm=args.slurm, 

333 job_template=args.job_template, 

334 debug=args.debug 

335 ) 

336 elif args.dataset_subcommand == 'groups': 

337 dataset.show_groups(cmd=True) 

338 # If user wants to run the NASSA analysis 

339 elif subcommand == "nassa": 

340 # If no input arguments are passed print help 

341 if args.config is None and args.make_config is False: 

342 nassa_parser.print_help() 

343 print('Please provide a configuration file or make one with the -m flag') 

344 return 

345 # If the user wants to make a configuration file 

346 if args.make_config: 

347 # print('args.make_config: ', args.make_config) 

348 if args.make_config is True or args.make_config == []: 

349 # Make a copy of the template in the local directory if there is not an inputs file yet 

350 if not exists(DEFAULT_NASSA_CONFIG_FILENAME): 

351 copyfile(NASSA_TEMPLATE_FILEPATH, DEFAULT_NASSA_CONFIG_FILENAME) 

352 # Open a text editor for the user 

353 call(["vim", DEFAULT_NASSA_CONFIG_FILENAME]) 

354 print('Configuration file created as nassa.json\nNow you can run the analysis with the -c flag.') 

355 return 

356 # If the user provides a path to the files 

357 else: 

358 generate_nassa_config( 

359 args.make_config, 

360 args.seq_path, 

361 args.output, 

362 args.unit_len, 

363 args.n_sequences 

364 ) 

365 print('Configuration file created as nassa.json\nNow you can run the analysis with the -c flag.') 

366 return 

367 # If the user wants to run the analysis. With the config file an analysis name must be provided, or the all flag must be set 

368 if args.config and args.analysis_names is None and args.all is False: 

369 nassa_parser.print_help() 

370 print('Please provide an analysis name to run:', ', '.join(NASSA_ANALYSES_LIST)) 

371 return 

372 # If the user wants to run the helical parameters analysis we must check if the necessary files are provided (structure, topology and trajectory) 

373 if args.helical_parameters: 

374 # Also, it is necesary to provide the project directories. Each of the project directories must contain an independent MD 

375 if args.proj_directories is None: 

376 nassa_parser.print_help() 

377 print('Please provide a project directory to run the helical parameters analysis with the -pdirs flag') 

378 return 

379 if args.input_structure_filepath is None: 

380 raise InputError('Please provide a structure file to run the helical parameters analysis with the -stru flag') 

381 elif args.input_trajectory_filepath is None: 

382 raise InputError('Please provide a trajectory file to run the helical parameters analysis with the -traj flag') 

383 elif args.input_topology_filepath is None: 

384 raise InputError('Please provide a topology file to run the helical parameters analysis with the -top flag') 

385 # If the all flag is set, the user must provide the path to the sequences because it is necessary to create the nassa.yml and run the NASSA analysis 

386 if args.all: 

387 if not args.seq_path: 

388 raise InputError('Please, if all option is selected provide the path to the sequences (--seq_path)') 

389 # If all the flags are correctly set, we can run the analysis 

390 workflow_nassa( 

391 config_file_path=None, # The configuration file is not needed in this case because we are going to run the helical parameters analysis so it will be created then 

392 analysis_names=args.analysis_names, 

393 overwrite=args.overwrite, 

394 overwrite_nassa=args.overwrite_nassa, 

395 helical_par=args.helical_parameters, 

396 proj_dirs=args.proj_directories, 

397 input_structure_file=args.input_structure_filepath, 

398 input_trajectory_file=args.input_trajectory_filepath, 

399 input_top_file=args.input_topology_filepath, 

400 all=args.all, 

401 unit_len=args.unit_len, 

402 n_sequences=args.n_sequences, 

403 seq_path=args.seq_path, 

404 md_directories=args.md_directories, 

405 trust=args.trust, 

406 mercy=args.mercy 

407 ) 

408 # If the user wants to run the NASSA analysis with the config file already created and the analysis name provided 

409 else: 

410 dict_args = vars(args) 

411 del dict_args['subcommand'] # preguntar Dani ¿? 

412 # Call the actual main function 

413 workflow_nassa( 

414 config_file_path=args.config, 

415 analysis_names=args.analysis_names, 

416 make_config=args.make_config, 

417 output=args.output, 

418 working_directory=args.working_directory, 

419 overwrite=args.overwrite, 

420 overwrite_nassa=args.overwrite_nassa, 

421 n_sequences=args.n_sequences, 

422 unit_len=args.unit_len, 

423 all=args.all, 

424 md_directories=args.md_directories, 

425 trust=args.trust, 

426 mercy=args.mercy 

427 ) 

428 

429 

430# Define a common parser running in top of all others 

431# This arguments declared here are available among all subparsers 

432common_parser = ArgumentParser(add_help=False) 

433 

434# If this argument is passed then no symlinks will be used anywhere 

435# Files will be copied instead thus taking more time and disk 

436# However symlinks are not always allowed in all file systems so this is sometimes necessary 

437common_parser.add_argument("-ns", "--no_symlinks", default=False, action='store_true', help="Do not use symlinks internally") 

438common_parser.add_argument("-nc", "--no_colors", default=False, action='store_true', help="Do not use colors for logging") 

439 

440# Define console arguments to call the workflow 

441parser = CustomArgumentParser(description="MDDB Workflow") 

442subparsers = parser.add_subparsers(help='Name of the subcommand to be used', dest="subcommand") 

443 

444project_init_help = parse_docstring_for_help(Project.__init__.__doc__) 

445workflow_help = parse_docstring_for_help(workflow.__doc__) 

446 

447# Set the run subcommand 

448run_parser = subparsers.add_parser("run", 

449 help="Run the workflow", 

450 formatter_class=CustomHelpFormatter, 

451 parents=[common_parser] 

452) 

453 

454# Set optional arguments 

455run_parser_input_group = run_parser.add_argument_group('INPUT OPTIONS') 

456run_parser_input_args = [ 

457 # There is no default since many formats may be possible 

458 (['-top', '--input_topology_filepath'], {'default': None, 'help': project_init_help['input_topology_filepath']+f'\nSupported formats: {", ".join(TOPOLOGY_SUPPORTED_FORMATS)}.'}), 

459 (['-stru', '--input_structure_filepath'], {'default': None, 'help': project_init_help['input_structure_filepath']+f'\nSupported formats: {", ".join(STRUCTURE_SUPPORTED_FORMATS)}.'}), 

460 (['-traj', '--input_trajectory_filepaths'], {'default': None, 'nargs': '*', 'help': project_init_help['input_trajectory_filepaths']+f'\nSupported formats: {", ".join(TRAJECTORY_SUPPORTED_FORMATS)}.'}), 

461 (['-dir', '--working_directory'], {'default': '.', 'help': "Directory where the whole workflow is run."}), 

462 (['-mdir', '--md_directories'], {'default': None, 'nargs': '*', 'help': project_init_help['md_directories']}), 

463 (['-md', '--md_config'], {'action': 'append', 'default': None, 'nargs': '*', 'help': project_init_help['md_config']}), 

464 (['-proj', '--accession'], {'default': None, 'help': project_init_help['accession']}), 

465 (['-url', '--database_url'], {'default': DEFAULT_API_URL, 'help': project_init_help['database_url']}), 

466 (['-inp', '--inputs_filepath'], {'default': None, 'help': "Path to inputs file"}), 

467 (['-fin', '--forced_inputs'], {'action': 'append', 'nargs': '*', 'default': None, 'help': project_init_help['forced_inputs']}), 

468 (['-pop', '--populations_filepath'], {'default': DEFAULT_POPULATIONS_FILENAME, 'help': project_init_help['populations_filepath']}), 

469 (['-tpro', '--transitions_filepath'], {'default': DEFAULT_TRANSITIONS_FILENAME, 'help': project_init_help['transitions_filepath']}), 

470 (['-ad', '--aiida_data_filepath'], {'default': None, 'help': project_init_help['aiida_data_filepath']}), 

471] 

472for flags, kwargs in run_parser_input_args: 

473 run_parser_input_group.add_argument(*flags, **kwargs) 

474 

475# Set a group for the workflow control options 

476run_parser_workflow_group = run_parser.add_argument_group('WORKFLOW CONTROL OPTIONS') 

477run_parser_workflow_args = [ 

478 (['-img', '--image'], {'action': 'store_true', 'help': project_init_help['image']}), 

479 (['-fit', '--fit'], {'action': 'store_true', 'help': project_init_help['fit']}), 

480 (['-trans', '--translation'], {'nargs': '*', 'default': [0, 0, 0], 'help': project_init_help['translation']}), 

481 (['-d', '--download'], {'action': 'store_true', 'help': workflow_help['download']}), 

482 (['-s', '--setup'], {'action': 'store_true', 'help': workflow_help['setup']}), 

483 (['-smp', '--sample_trajectory'], {'type': int, 'nargs': '?', 'default': None, 'const': 10, 'metavar': 'N_FRAMES', 'help': project_init_help['sample_trajectory']}), 

484 (['-rcut', '--rmsd_cutoff'], {'type': float, 'default': DEFAULT_RMSD_CUTOFF, 'help': project_init_help['rmsd_cutoff']}), 

485 (['-icut', '--interaction_cutoff'], {'type': float, 'default': DEFAULT_INTERACTION_CUTOFF, 'help': project_init_help['interaction_cutoff']}), 

486 (['-iauto', '--interactions_auto'], {'type': str, 'nargs': '?', 'const': True, 'help': project_init_help['interactions_auto']}), 

487 (['-gb', '--guess_bonds'], {'action': 'store_true', 'help': project_init_help['guess_bonds']}), 

488 (['-ib', '--ignore_bonds'], {'action': 'store_true', 'help': project_init_help['ignore_bonds']}), 

489] 

490for flags, kwargs in run_parser_workflow_args: 

491 run_parser_workflow_group.add_argument(*flags, **kwargs) 

492 

493# Set a group for the selection options 

494run_parser_selection_group = run_parser.add_argument_group('SELECTION OPTIONS') 

495run_parser_selection_args = [ 

496 (['-filt', '--filter_selection'], {'nargs': '?', 'default': False, 'const': True, 'help': project_init_help['filter_selection']}), 

497 (['-pbc', '--pbc_selection'], {'default': None, 'help': project_init_help['pbc_selection']}), 

498 (['-cg', '--cg_selection'], {'default': None, 'help': project_init_help['cg_selection']}), 

499 (['-pcafit', '--pca_fit_selection'], {'default': PROTEIN_AND_NUCLEIC_BACKBONE, 'help': project_init_help['pca_fit_selection']}), 

500 (['-pcana', '--pca_analysis_selection'], {'default': PROTEIN_AND_NUCLEIC_BACKBONE, 'help': project_init_help['pca_analysis_selection']}), 

501] 

502for flags, kwargs in run_parser_selection_args: 

503 run_parser_selection_group.add_argument(*flags, **kwargs) 

504 

505# Set a group with all input checking options 

506run_parser_checks_group = run_parser.add_argument_group('INPUT CHECKS OPTIONS', description=f"For more information about each check please visit:\n{test_docs_url}") 

507run_parser_checks_args = [ 

508 (['-t', '--trust'], {'default': [], 'nargs': '*', 'action': custom, 'const': AVAILABLE_CHECKINGS, 'choices': AVAILABLE_CHECKINGS, 

509 'help': ("If passed, do not run the specified checking. Note that all checkings are skipped if passed alone. " 

510 "Available checkings:" + pretty_list(AVAILABLE_CHECKINGS))}), 

511 (['-m', '--mercy'], {'default': [], 'nargs': '*', 'action': custom, 'const': AVAILABLE_FAILURES, 'choices': AVAILABLE_FAILURES, 

512 'help': ("If passed, do not kill the process when any of the specfied checkings fail and proceed with the workflow. " 

513 "Note that all checkings are allowed to fail if the argument is passed alone. " 

514 "Available checkings:" + pretty_list(AVAILABLE_FAILURES))}), 

515 (['-f', '--faith'], {'default': False, 'action': 'store_true', 

516 'help': ("Use this flag to force-skip all data processing thus asuming inputs are already processed.\n" 

517 "WARNING: Do not use this flag if you don't know what you are doing.\n" 

518 "This may lead to several silent errors.")}), 

519 (['-sl', '--ssleep'], {'default': False, 'action': 'store_true', 

520 'help': ("Use this flag to skip SSL certificate authentication.\n" 

521 "WARNING: Do not use this flag if you don't trust the data source.")}), 

522] 

523for flags, kwargs in run_parser_checks_args: 

524 run_parser_checks_group.add_argument(*flags, **kwargs) 

525 

526# Set a list with the alias of all requestable dependencies 

527choices = sorted(list(requestables.keys()) + list(DEPENDENCY_FLAGS.keys())) 

528task_groups = [ 

529 "download: Check/download input files (already ran with analyses)", 

530 "setup: Process and test input files (already ran with analyses)", 

531 "meta: Run project and MD metadata analyses", 

532 "network: Run dependencies which require internet connection", 

533 "minimal: Run dependencies required by the web client to work", 

534 "interdeps: Run interactions and all its dependent analyses", 

535 "membs: Run all membrane-related analyses", 

536] 

537assert len(DEPENDENCY_FLAGS.keys()) == len(task_groups), "The number of dependency flags and task groups must be the same" 

538 

539run_parser_analysis_group = run_parser.add_argument_group('TASKS OPTIONS', 

540 description=f"Available tasks: {choices}\nFor more information about each task, please visit:\n{task_docs_url}") 

541run_parser_analysis_args = [ 

542 (['-i', '--include'], {'nargs': '*', 'choices': choices, 

543 'help': ("Set the unique analyses or tools to be run. All other steps will be skipped.\n" 

544 "There are also some additional flags to define a preconfigured group of dependencies:" 

545 + '\n - ' + '\n - '.join(task_groups))}), 

546 (['-e', '--exclude'], {'nargs': '*', 'choices': choices, 'help': workflow_help['exclude']}), 

547 (['-ow', '--overwrite'], {'type': str, 'nargs': '*', 'default': [], 'action': custom, 'const': True, 'choices': choices, 'help': workflow_help['overwrite']}), 

548] 

549for flags, kwargs in run_parser_analysis_args: 

550 run_parser_analysis_group.add_argument(*flags, **kwargs) 

551 

552 

553# Add a new to command to aid in the inputs file setup 

554inputs_parser = subparsers.add_parser("inputs", 

555 help="Set the inputs file", 

556 formatter_class=CustomHelpFormatter, 

557 parents=[common_parser] 

558) 

559# Chose the editor in advance 

560inputs_parser.add_argument( 

561 "-ed", "--editor", 

562 choices=[*AVAILABLE_TEXT_EDITORS.values(), 'none'], 

563 help="Set the text editor to modify the inputs file") 

564 

565 

566# The convert command 

567convert_parser = subparsers.add_parser("convert", 

568 help="Convert a structure and/or several trajectories to other formats\n" + 

569 "If several input trajectories are passed they will be merged previously", 

570 formatter_class=CustomHelpFormatter, 

571 parents=[common_parser]) 

572convert_parser_args = [ 

573 (['-is', '--input_structure'], {'help': "Path to input structure file"}), 

574 (['-os', '--output_structure'], {'help': "Path to output structure file"}), 

575 (['-it', '--input_trajectories'], {'nargs': '*', 'help': "Path to input trajectory file or same format files."}), 

576 (['-ot', '--output_trajectory'], {'help': "Path to output trajectory file"}), 

577] 

578for flags, kwargs in convert_parser_args: 

579 convert_parser.add_argument(*flags, **kwargs) 

580 

581 

582# The filter command 

583filter_parser = subparsers.add_parser("filter", 

584 help="Filter atoms in a structure and/or a trajectory\n", 

585 formatter_class=CustomHelpFormatter, 

586 parents=[common_parser]) 

587filter_parser_args = [ 

588 (["-is", "--input_structure"], {'required': True, 'help': "Path to input structure file"}), 

589 (["-os", "--output_structure"], {'help': "Path to output structure file"}), 

590 (["-it", "--input_trajectory"], {'help': "Path to input trajectory file"}), 

591 (["-ot", "--output_trajectory"], {'help': "Path to output trajectory file"}), 

592 (["-sel", "--selection_string"], {'help': "Atom selection"}), 

593 (["-syn", "--selection_syntax"], {'default': 'vmd', 'help': "Atom selection syntax (vmd by default)"}), 

594] 

595for flags, kwargs in filter_parser_args: 

596 filter_parser.add_argument(*flags, **kwargs) 

597 

598 

599# The subset command 

600subset_parser = subparsers.add_parser("subset", 

601 help="Get a subset of frames from the current trajectory", 

602 formatter_class=CustomHelpFormatter, 

603 parents=[common_parser]) 

604subset_parser_args = [ 

605 (["-is", "--input_structure"], {'required': True, 'help': "Path to input structure file"}), 

606 (["-it", "--input_trajectory"], {'required': True, 'help': "Path to input trajectory file"}), 

607 (["-ot", "--output_trajectory"], {'help': "Path to output trajectory file"}), 

608 (["-start", "--start"], {'type': int, 'default': 0, 'help': "Start frame (0-based)"}), 

609 (["-end", "--end"], {'type': int, 'default': None, 'help': "End frame (0-based)"}), 

610 (["-step", "--step"], {'type': int, 'default': 1, 'help': "Frame step"}), 

611 (["-skip", "--skip"], {'nargs': '*', 'type': int, 'default': [], 'help': "Frames to be skipped (0-based)"}), 

612 (["-fr", "--frames"], {'nargs': '*', 'type': int, 'default': [], 'help': "Frames to be returned (0-based). Input frame order is ignored as original frame order is conserved."}), 

613] 

614for flags, kwargs in subset_parser_args: 

615 subset_parser.add_argument(*flags, **kwargs) 

616 

617 

618# The chainer command 

619chainer_parser = subparsers.add_parser("chainer", 

620 help="Edit structure (pdb) chains", 

621 formatter_class=CustomHelpFormatter, 

622 parents=[common_parser]) 

623chainer_parser_args = [ 

624 (["-is", "--input_structure"], {'required': True, 'help': "Path to input structure file"}), 

625 (["-os", "--output_structure"], {'default': 'chained.pdb', 'help': "Path to output structure file"}), 

626 (["-sel", "--selection_string"], {'help': "Atom selection (the whole structure by default)"}), 

627 (["-syn", "--selection_syntax"], {'default': 'vmd', 'choices': Structure.SUPPORTED_SELECTION_SYNTAXES, 'help': "Atom selection syntax (VMD syntax by default)"}), 

628 (["-let", "--letter"], {'help': "New chain letter (one letter per fragment by default)"}), 

629 (["-whfr", "--whole_fragments"], {'type': bool, 'default': False, 'help': "Consider fragments beyond the atom selection. Otherwise a fragment could end up having multiple chains."}), 

630] 

631for flags, kwargs in chainer_parser_args: 

632 chainer_parser.add_argument(*flags, **kwargs) 

633 

634 

635# The NASSA commands 

636nassa_parser = subparsers.add_parser("nassa", formatter_class=CustomHelpFormatter, 

637 help="Run and set the configuration of the NASSA analysis", 

638 parents=[common_parser]) 

639nassa_parser.add_argument( 

640 "-c", "--config", 

641 help="Configuration file for the NASSA analysis") 

642nassa_parser.add_argument( 

643 "-n", "--analysis_names", 

644 nargs='*', 

645 default=None, 

646 help="Name of the analysis to be run. It can be: " + ', '.join(NASSA_ANALYSES_LIST)) 

647nassa_parser.add_argument( 

648 "-w", "--make_config", 

649 # type=str, 

650 nargs='*', 

651 default=None, 

652 # const=True, 

653 # action=custom, 

654 help="Make a configuration file for the NASSA analysis: makecfg.\nThe base path could be given as an argument. If not, an example of configuration file is created.") 

655nassa_parser.add_argument( 

656 "-seq", "--seq_path", 

657 type=str, 

658 const=False, 

659 action=custom, 

660 help="Set the base path of the sequences. If not given, the sequences are searched in the current directory.") 

661nassa_parser.add_argument( 

662 "-o", "--output", 

663 help="Output path for the NASSA analysis") 

664nassa_parser.add_argument( 

665 "-dir", "--working_directory", 

666 default='.', 

667 help="Directory where the whole workflow is run. Current directory by default.") 

668nassa_parser.add_argument( 

669 "-ow", "--overwrite", 

670 type=str, 

671 nargs='*', 

672 default=[], 

673 action=custom, 

674 const=True, 

675 help="Set the output files to be overwritten thus re-runing its corresponding analysis or tool") 

676nassa_parser.add_argument( 

677 "-own", "--overwrite_nassa", 

678 type=str, 

679 nargs='*', 

680 default=[], 

681 action=custom, 

682 const=True, 

683 help="Set the output files to be overwritten thus re-runing its corresponding analysis or tool for the NASSA analysis") 

684nassa_parser.add_argument( 

685 "-nseq", "--n_sequences", 

686 type=int, 

687 help="Number of sequences to be analyzed") 

688nassa_parser.add_argument( 

689 "-i", "--unit_len", 

690 type=int, 

691 default=6, 

692 help="Number of base pairs to be analyzed") 

693nassa_parser.add_argument( 

694 "-hp", "--helical_parameters", 

695 action='store_true', 

696 default=False, 

697 help="Run the helical parameters analysis") 

698nassa_parser.add_argument( 

699 "-pdirs", "--proj_directories", 

700 nargs='*', 

701 default=None, 

702 help=("Path to the different project directories. Each directory is to contain an independent project.\n" 

703 "Several output files will be generated in the same folder directory")) 

704nassa_parser.add_argument( 

705 "-all", "--all", 

706 action='store_true', 

707 default=False, 

708 help="Run all the helical parameters and NASSA analyses") 

709nassa_parser.add_argument( 

710 "-stru", "--input_structure_filepath", 

711 default=None, 

712 help=("Path to input structure file. It may be relative to the project or to each MD directory.\n" 

713 "If this value is not passed then the standard structure file is used as input by default")) 

714nassa_parser.add_argument( 

715 "-traj", "--input_trajectory_filepath", 

716 nargs='*', 

717 default=None, 

718 help=("Path to input trajectory file. It is relative to each MD directory.\n" 

719 "If this value is not passed then the standard trajectory file path is used as input by default")) 

720nassa_parser.add_argument( 

721 "-top", "--input_topology_filepath", 

722 default=None, # There is no default since many formats may be possible 

723 help="Path to input topology file. It is relative to the project directory.") 

724nassa_parser.add_argument( 

725 "-mdir", "--md_directories", 

726 nargs='*', 

727 default=None, 

728 help=("Path to the different MD directories. Each directory is to contain an independent trajectory and structure.\n" 

729 "Several output files will be generated in every MD directory")) 

730nassa_parser.add_argument( 

731 "-t", "--trust", 

732 type=str, 

733 nargs='*', 

734 default=[], 

735 action=custom, 

736 const=AVAILABLE_CHECKINGS, 

737 choices=AVAILABLE_CHECKINGS, 

738 help="If passed, do not run the specified checking. Note that all checkings are skipped if passed alone. Available checkings:" + pretty_list(AVAILABLE_CHECKINGS) 

739) 

740nassa_parser.add_argument( 

741 "-m", "--mercy", 

742 type=str, 

743 nargs='*', 

744 default=[], 

745 action=custom, 

746 const=AVAILABLE_FAILURES, 

747 choices=AVAILABLE_FAILURES, 

748 help=("If passed, do not kill the process when any of the specfied checkings fail and proceed with the workflow.\n" 

749 "Note that all checkings are allowed to fail if the argument is passed alone. Available checkings:" + pretty_list(AVAILABLE_FAILURES)) 

750) 

751nassa_parser.add_argument( 

752 "-dup", "--duplicates", 

753 default=False, 

754 action='store_true', 

755 help="If passed, merge duplicate subunits if there is more than one, in the sequences. if not only the last will be selected" 

756) 

757 

758# Dataset subcommand 

759dataset_parser = subparsers.add_parser("dataset", formatter_class=CustomHelpFormatter, 

760 help="Manage and process a dataset of MDDB projects.") 

761dataset_subparsers = dataset_parser.add_subparsers(dest='dataset_subcommand', help='Dataset subcommands') 

762 

763# Dataset run subcommand 

764dataset_run_parser = dataset_subparsers.add_parser("run", formatter_class=CustomHelpFormatter, 

765help="Run the workflow for a dataset of MDDB projects.", 

766 parents=[common_parser]) 

767dataset_run_parser.add_argument("dataset_yaml", help="Path to the dataset YAML file.") 

768dataset_run_parser.add_argument("-ig", "--include-groups", nargs='*', type=int, default=[], help="List of group IDs to be run.") 

769dataset_run_parser.add_argument("-eg", "--exclude-groups", nargs='*', type=int, default=[], help="List of group IDs to be excluded.") 

770dataset_run_parser.add_argument("-n", "--n_jobs", type=int, default=0, help="Number of jobs to run.") 

771dataset_run_parser.add_argument("--slurm", action="store_true", help="Submit the workflow to SLURM.") 

772dataset_run_parser.add_argument("-jt", "--job-template", help="Path to the SLURM job template file. Required if --slurm is used.") 

773dataset_run_parser.add_argument("--debug", action="store_true", help="Only print the commands without executing them.") 

774 

775# Dataset status subcommand 

776dataset_status_parser = dataset_subparsers.add_parser("groups", formatter_class=CustomHelpFormatter, 

777 help="Show the status of projects in a dataset, grouped by their last log message.", 

778 parents=[common_parser]) 

779dataset_status_parser.add_argument("dataset_yaml", help="Path to the dataset YAML file.")