Coverage for mddb_workflow/console.py: 78%

1from pathlib import Path

2from os.path import exists

3from shutil import copyfile

4from subprocess import call

5from argparse import ArgumentParser, RawTextHelpFormatter, Action, _SubParsersAction

6from textwrap import wrap, dedent

7import re

9from mddb_workflow.mwf import workflow, Project, requestables, DEPENDENCY_FLAGS

10from mddb_workflow.utils.structures import Structure

11from mddb_workflow.utils.file import File

12from mddb_workflow.utils.filters import filter_atoms

13from mddb_workflow.utils.subsets import get_trajectory_subset

14from mddb_workflow.utils.constants import *

15from mddb_workflow.utils.auxiliar import InputError

16from mddb_workflow.utils.nassa_file import generate_nassa_config

17from mddb_workflow.tools.conversions import convert

18from mddb_workflow.analyses.nassa import workflow_nassa

19from mddb_workflow.core.dataset import Dataset

21# Set the path to the input setter jupyter notebook

22inputs_template = str(Path(__file__).parent / "resources" / "inputs_file_template.yml")

23nassa_template = str(Path(__file__).parent / "resources" / "nassa_template.yml")

25expected_project_args = set(Project.__init__.__code__.co_varnames)

27test_docs_url = 'https://mddb-workflow.readthedocs.io/en/latest/usage.html#tests-and-other-checking-processes'

28task_docs_url = 'https://mddb-workflow.readthedocs.io/en/latest/tasks.html'

30class CustomHelpFormatter(RawTextHelpFormatter):

31 """ Custom formatter for argparse help text with better organization and spacing. """

33 def __init__(self, prog, indent_increment=2, max_help_position=6, width=None):

34 super().__init__(prog, indent_increment, max_help_position, width)

36 def _get_help_string(self, action):

37 import argparse

38 help = action.help

39 if '%(default)' not in action.help:

40 if action.default is not argparse.SUPPRESS and \

41 action.default and action.default != '.':

42 defaulting_nargs = [argparse.OPTIONAL, argparse.ZERO_OR_MORE]

43 if action.option_strings or action.nargs in defaulting_nargs:

44 help += '\nDefault: %(default)s'

45 return help

47 def _split_lines(self, text, width):

48 lines = []

49 for line in text.splitlines():

50 if line.strip() != '':

51 if line.startswith('https'):

52 lines.append(line)

53 else:

54 lines.extend(wrap(line, width, break_long_words=False, replace_whitespace=False))

55 return lines

57 def _format_usage(self, usage, actions, groups, prefix):

58 essential_usage = super()._format_usage(usage, actions, groups, prefix)

59 # Only for mwf run

60 if 'run' in self._prog:

61 # Combine the aguments for -i, -e, -ow

62 lines = essential_usage.split('\n')

63 filtered_lines = []

64 for line in lines:

65 if line.strip().startswith("[-i "):

66 line = line.replace("[-i", "[-i/-e/-ow")

67 filtered_lines.append(line)

68 elif line.strip().startswith("[-e") or line.strip().startswith("[-ow"):

69 continue

70 else:

71 filtered_lines.append(line)

72 essential_usage = '\n'.join(filtered_lines)

73 return essential_usage

75 def _format_action_invocation(self, action):

76 """ Format the display of options with choices more cleanly. """

77 if not action.option_strings:

78 # This is a positional argument

79 return super()._format_action_invocation(action)

81 # For options with choices, format them nicely

82 opts = ', '.join(action.option_strings)

84 # Special case for include, exclude, and overwrite

85 if action.dest in ['include', 'exclude', 'overwrite']:

86 opts = ', '.join(action.option_strings)

87 metavar = 'TASKS'

88 return f"{opts} {metavar}"

89 if action.nargs == 0:

90 # Boolean flag

91 return opts

92 else:

93 # Format with metavar or choices

94 metavar = self._format_args(action, action.dest.upper())

95 if action.choices:

96 choice_str = '{' + ','.join(str(c) for c in action.choices) + '}'

97 # if action.nargs is not None and action.nargs != 1:

98 # choice_str += ' ...'

99 return f"{opts} [{choice_str}]"

100 else:

101 return f"{opts} {metavar}"

102

103class CustomArgumentParser(ArgumentParser):

104 """ This parser extends the ArgumentParser to handle subcommands and errors more gracefully. """

105 def error(self, message):

106 # Check for subcommand in sys.argv

107 import sys

108 # Extract subcommand from command line if it exists

109 if hasattr(self, '_subparsers') and self._subparsers is not None:

110 subcommands = [choice for action in self._subparsers._actions

111 if isinstance(action, _SubParsersAction)

112 for choice in action.choices]

113 if len(sys.argv) > 1 and sys.argv[1] in subcommands:

114 self.subcommand = sys.argv[1]

115

116 # Now continue with your existing logic

117 if hasattr(self, 'subcommand') and self.subcommand:

118 self._print_message(f"{self.prog} {self.subcommand}: error: {message}\n", sys.stderr)

119 # Show help for the specific subparser

120 for action in self._subparsers._actions:

121 if isinstance(action, _SubParsersAction):

122 for choice, subparser in action.choices.items():

123 if choice == self.subcommand:

124 subparser.print_usage()

125 break

126 else:

127 # Default error behavior for main parser

128 self.print_usage(sys.stderr)

129 self._print_message(f"{self.prog}: error: {message}\n", sys.stderr)

130 sys.exit(2)

131

132def parse_docstring_for_help(docstring):

133 """ Parses a docstring to extract help for arguments. """

134 if not docstring:

135 return {}

136

137 docstring = dedent(docstring)

138 arg_section_match = re.search(r'Args:\n(.*?)(?=\n\n|\Z)', docstring, re.S)

139 if not arg_section_match:

140 return {}

141

142 arg_section = arg_section_match.group(1)

143 arg_lines = arg_section.strip().split('\n')

144

145 help_dict = {}

146 # Regex to find argument name and its full description, including newlines

147 arg_blocks = re.findall(r'^\s*([a-zA-Z0-9_]+)\s*\(.*?\):\s*(.*?)(?=\n\s*[a-zA-Z0-9_]+\s*\(|\Z)', arg_section, re.S | re.M)

148

149 for arg_name, help_text in arg_blocks:

150 # Clean up the help text: remove leading/trailing whitespace from each line and join

151 lines = []

152 for line in help_text.strip().split('\n'):

153 # For lines that are part of a list, just rstrip to keep indentation

154 if line.lstrip().startswith('-'):

155 lines.append(' '+line.strip())

156 else:

157 lines.append(line.strip())

158 help_dict[arg_name] = '\n'.join(lines)

159

160 return help_dict

161

162# Main ---------------------------------------------------------------------------------

163

164# Function called through argparse

165def main ():

166 # Parse input arguments from the console

167 # The vars function converts the args object to a dictionary

168 args = parser.parse_args()

169 if hasattr(args, 'subcommand') and args.subcommand:

170 parser.subcommand = args.subcommand

171 # Apply common arguments as necessary

172 if hasattr(args, 'no_symlinks') and args.no_symlinks:

173 GLOBALS['no_symlinks'] = True

174 if hasattr(args, 'no_colors') and args.no_colors:

175 GLOBALS['no_colors'] = True

176 # Find which subcommand was called

177 subcommand = args.subcommand

178 # If there is not subcommand then print help

179 if not subcommand:

180 parser.print_help()

181 # If user wants to run the workflow

182 elif subcommand == "run":

183 # Ger all parsed arguments

184 dict_args = vars(args)

185 # Remove arguments not related to this subcommand

186 del dict_args['subcommand']

187 # Remove common arguments from the dict as well

188 common_args = [ action.dest for action in common_parser._actions ]

189 for arg in common_args:

190 del dict_args[arg]

191 # Find out which arguments are for the Project class and which ones are for the workflow

192 project_args = {}

193 workflow_args = {}

194 for k, v in dict_args.items():

195 if k in expected_project_args:

196 project_args[k] = v

197 else:

198 workflow_args[k] = v

199 # Call the actual main function

200 workflow(project_parameters = project_args, **workflow_args)

201 # If user wants to setup the inputs

202 elif subcommand == "inputs":

203 # Make a copy of the template in the local directory if there is not an inputs file yet

204 if exists(DEFAULT_INPUTS_FILENAME):

205 print(f"File {DEFAULT_INPUTS_FILENAME} already exists")

206 else:

207 copyfile(inputs_template, DEFAULT_INPUTS_FILENAME)

208 print(f"File {DEFAULT_INPUTS_FILENAME} has been generated")

209 # Set the editor to be used to modify the inputs file

210 editor_command = args.editor

211 if editor_command:

212 if editor_command == 'none': return

213 return call([editor_command, DEFAULT_INPUTS_FILENAME])

214 # If no editor argument is passed then ask the user for one

215 print("Choose your preferred editor:")

216 available_editors = list(AVAILABLE_TEXT_EDITORS.keys())

217 for i, editor_name in enumerate(available_editors, 1):

218 print(f"{i}. {editor_name}")

219 print("*. exit")

220 try:

221 choice = int(input("Number: ").strip())

222 if not (1 <= choice <= len(available_editors)): raise ValueError

223 editor_name = available_editors[choice - 1]

224 editor_command = AVAILABLE_TEXT_EDITORS[editor_name]

225 # Open a text editor for the user

226 print(f"{editor_name} was selected")

227 call([editor_command, DEFAULT_INPUTS_FILENAME])

228 except ValueError:

229 print(f"No editor was selected")

230

231

232 # In case the convert tool was called

233 elif subcommand == 'convert':

234 # If no input arguments are passed print help

235 if args.input_structure == None and args.input_trajectories == None:

236 convert_parser.print_help()

237 return

238 if args.input_trajectories == None:

239 args.input_trajectories = []

240 # Run the convert command

241 convert(

242 input_structure_filepath=args.input_structure,

243 output_structure_filepath=args.output_structure,

244 input_trajectory_filepaths=args.input_trajectories,

245 output_trajectory_filepath=args.output_trajectory,

246 )

247

248 # In case the filter tool was called

249 elif subcommand == 'filter':

250 # Run the convert command

251 filter_atoms(

252 input_structure_file = File(args.input_structure),

253 output_structure_file = File(args.output_structure),

254 input_trajectory_file = File(args.input_trajectory),

255 output_trajectory_file = File(args.output_trajectory),

256 selection_string = args.selection_string,

257 selection_syntax = args.selection_syntax

258 )

259 print('There you have it :)')

260

261 # In case the subset tool was called

262 elif subcommand == 'subset':

263 output_trajectory = args.output_trajectory if args.output_trajectory else args.input_trajectory

264 get_trajectory_subset(

265 input_structure_file=File(args.input_structure),

266 input_trajectory_file=File(args.input_trajectory),

267 output_trajectory_file=File(output_trajectory),

268 start=args.start,

269 end=args.end,

270 step=args.step,

271 skip=args.skip,

272 frames=args.frames

273 )

274 print('All done :)')

275

276 # In case the chainer tool was called

277 elif subcommand == 'chainer':

278 # Parse the structure

279 structure = Structure.from_pdb_file(args.input_structure)

280 # Select atom accoridng to inputs

281 selection = structure.select(args.selection_string, args.selection_syntax) if args.selection_string else structure.select_all()

282 if not selection: raise InputError(f'Empty selection {selection}')

283 # Run the chainer logic

284 structure.chainer(selection, args.letter, args.whole_fragments)

285 # Generate the output file from the modified structure

286 structure.generate_pdb_file(args.output_structure)

287 print(f'Changes written to {args.output_structure}')

288 elif subcommand == 'dataset':

289 if not hasattr(args, 'dataset_subcommand') or not args.dataset_subcommand:

290 dataset_parser.print_help()

291 return

292

293 dataset = Dataset(dataset_yaml_path=args.dataset_yaml)

294

295 if args.dataset_subcommand == 'run':

296 dataset.launch_workflow(

297 include_groups=args.include_groups,

298 exclude_groups=args.exclude_groups,

299 slurm=args.slurm,

300 job_template=args.job_template

301 )

302 elif args.dataset_subcommand == 'status':

303 dataset.show_groups(cmd=True)

304 # If user wants to run the NASSA analysis

305 elif subcommand == "nassa":

306 # If no input arguments are passed print help

307 if args.config == None and args.make_config == False:

308 nassa_parser.print_help()

309 print('Please provide a configuration file or make one with the -m flag')

310 return

311 # If the user wants to make a configuration file

312 if args.make_config:

313 #print('args.make_config: ', args.make_config)

314 if args.make_config == True or args.make_config == []:

315 # Make a copy of the template in the local directory if there is not an inputs file yet

316 if not exists(DEFAULT_NASSA_CONFIG_FILENAME):

317 copyfile(nassa_template, DEFAULT_NASSA_CONFIG_FILENAME)

318 # Open a text editor for the user

319 call(["vim", DEFAULT_NASSA_CONFIG_FILENAME])

320 print('Configuration file created as nassa.json\nNow you can run the analysis with the -c flag.')

321 return

322 # If the user provides a path to the files

323 else:

324 generate_nassa_config(

325 args.make_config,

326 args.seq_path,

327 args.output,

328 args.unit_len,

329 args.n_sequences

330 )

331 print('Configuration file created as nassa.json\nNow you can run the analysis with the -c flag.')

332 return

333 # If the user wants to run the analysis. With the config file an analysis name must be provided, or the all flag must be set

334 if args.config and args.analysis_names == None and args.all == False:

335 nassa_parser.print_help()

336 print('Please provide an analysis name to run:', ', '.join(NASSA_ANALYSES_LIST))

337 return

338 # If the user wants to run the helical parameters analysis we must check if the necessary files are provided (structure, topology and trajectory)

339 if args.helical_parameters:

340 # Also, it is necesary to provide the project directories. Each of the project directories must contain an independent MD

341 if args.proj_directories == None:

342 nassa_parser.print_help()

343 print('Please provide a project directory to run the helical parameters analysis with the -pdirs flag')

344 return

345 if args.input_structure_filepath == None:

346 raise InputError('Please provide a structure file to run the helical parameters analysis with the -stru flag')

347 elif args.input_trajectory_filepath == None:

348 raise InputError('Please provide a trajectory file to run the helical parameters analysis with the -traj flag')

349 elif args.input_topology_filepath == None:

350 raise InputError('Please provide a topology file to run the helical parameters analysis with the -top flag')

351 # If the all flag is set, the user must provide the path to the sequences because it is necessary to create the nassa.yml and run the NASSA analysis

352 if args.all:

353 if not args.seq_path:

354 raise InputError('Please, if all option is selected provide the path to the sequences (--seq_path)')

355 # If all the flags are correctly set, we can run the analysis

356 workflow_nassa(

357 config_file_path=None, # The configuration file is not needed in this case because we are going to run the helical parameters analysis so it will be created then

358 analysis_names=args.analysis_names,

359 overwrite=args.overwrite,

360 overwrite_nassa=args.overwrite_nassa,

361 helical_par=args.helical_parameters,

362 proj_dirs=args.proj_directories,

363 input_structure_file=args.input_structure_filepath,

364 input_trajectory_file=args.input_trajectory_filepath,

365 input_top_file=args.input_topology_filepath,

366 all=args.all,

367 unit_len=args.unit_len,

368 n_sequences=args.n_sequences,

369 seq_path=args.seq_path,

370 md_directories=args.md_directories,

371 trust=args.trust,

372 mercy=args.mercy

373 )

374 # If the user wants to run the NASSA analysis with the config file already created and the analysis name provided

375 else:

376 dict_args = vars(args)

377 del dict_args['subcommand'] # preguntar Dani ¿?

378 # Call the actual main function

379 workflow_nassa(

380 config_file_path = args.config,

381 analysis_names = args.analysis_names,

382 make_config = args.make_config,

383 output = args.output,

384 working_directory = args.working_directory,

385 overwrite = args.overwrite,

386 overwrite_nassa = args.overwrite_nassa,

387 n_sequences = args.n_sequences,

388 unit_len = args.unit_len,

389 all= args.all,

390 md_directories=args.md_directories,

391 trust=args.trust,

392 mercy=args.mercy

393 )

394

395# Define a common parser running in top of all others

396# This arguments declared here are available among all subparsers

397common_parser = ArgumentParser(add_help=False)

398

399# If this argument is passed then no symlinks will be used anywhere

400# Files will be copied instead thus taking more time and disk

401# However symlinks are not always allowed in all file systems so this is sometimes necessary

402common_parser.add_argument("-ns", "--no_symlinks", default=False, action='store_true', help="Do not use symlinks internally")

403common_parser.add_argument("-nc", "--no_colors", default=False, action='store_true', help="Do not use colors for logging")

404

405# Define console arguments to call the workflow

406parser = CustomArgumentParser(description="MDDB Workflow")

407subparsers = parser.add_subparsers(help='Name of the subcommand to be used', dest="subcommand")

408

409# Parse Project.__init__ docstring for help texts

410project_init_help = parse_docstring_for_help(Project.__init__.__doc__)

411

412# Set the run subcommand

413run_parser = subparsers.add_parser("run",

414 help="Run the workflow",

415 formatter_class=CustomHelpFormatter,

416 parents=[common_parser]

417)

418

419# Set optional arguments

420run_parser_input_group = run_parser.add_argument_group('INPUT OPTIONS')

421run_parser_input_group.add_argument(

422 "-top", "--input_topology_filepath",

423 default=None, # There is no default since many formats may be possible

424 help=project_init_help['input_topology_filepath'])

425run_parser_input_group.add_argument(

426 "-stru", "--input_structure_filepath",

427 default=None,

428 help=project_init_help['input_structure_filepath'])

429run_parser_input_group.add_argument(

430 "-traj", "--input_trajectory_filepaths",

431 #type=argparse.FileType('r'),

432 nargs='*',

433 default=None,

434 help=project_init_help['input_trajectory_filepaths'])

435run_parser_input_group.add_argument(

436 "-dir", "--working_directory",

437 default='.',

438 help="Directory where the whole workflow is run. Current directory by default.")

439run_parser_input_group.add_argument(

440 "-mdir", "--md_directories",

441 nargs='*',

442 default=None,

443 help=project_init_help['md_directories'])

444run_parser_input_group.add_argument(

445 "-md", "--md_config",

446 action='append',

447 nargs='*',

448 default=None,

449 help=project_init_help['md_config'])

450run_parser_input_group.add_argument(

451 "-proj", "--accession",

452 default=None,

453 help=project_init_help['accession'])

454run_parser_input_group.add_argument(

455 "-url", "--database_url",

456 default=DEFAULT_API_URL,

457 help=project_init_help['database_url'])

458run_parser_input_group.add_argument(

459 "-inp", "--inputs_filepath",

460 default=None,

461 help="Path to inputs file")

462run_parser_input_group.add_argument(

463 "-pop", "--populations_filepath",

464 default=DEFAULT_POPULATIONS_FILENAME,

465 help=project_init_help['populations_filepath'])

466run_parser_input_group.add_argument(

467 "-tpro", "--transitions_filepath",

468 default=DEFAULT_TRANSITIONS_FILENAME,

469 help=project_init_help['transitions_filepath'])

470run_parser_input_group.add_argument(

471 "-ad", "--aiida_data_filepath",

472 default=None,

473 help=project_init_help['aiida_data_filepath'])

474# Set a group for the workflow control options

475run_parser_workflow_group = run_parser.add_argument_group('WORKFLOW CONTROL OPTIONS')

476run_parser_workflow_group.add_argument(

477 "-img", "--image",

478 action='store_true',

479 help=project_init_help['aiida_data_filepath'])

480run_parser_workflow_group.add_argument(

481 "-fit", "--fit",

482 action='store_true',

483 help=project_init_help['fit'])

484run_parser_workflow_group.add_argument(

485 "-trans", "--translation",

486 nargs='*',

487 default=[0,0,0],

488 help=project_init_help['translation'])

489run_parser_workflow_group.add_argument(

490 "-d", "--download",

491 action='store_true',

492 help="(Deprecated: use -i download) If passed, only download required files. Then exits.")

493run_parser_workflow_group.add_argument(

494 "-s", "--setup",

495 action='store_true',

496 help="(Deprecated: use -i setup) If passed, only download required files and run mandatory dependencies. Then exits.")

497run_parser_workflow_group.add_argument(

498 "-smp", "--sample_trajectory",

499 type=int,

500 nargs='?',

501 default=None,

502 const=10,

503 metavar='N_FRAMES',

504 help=project_init_help['sample_trajectory'])

505run_parser_workflow_group.add_argument(

506 "-rcut", "--rmsd_cutoff",

507 type=float,

508 default=DEFAULT_RMSD_CUTOFF,

509 help=project_init_help['rmsd_cutoff'])

510run_parser_workflow_group.add_argument(

511 "-icut", "--interaction_cutoff",

512 type=float,

513 default=DEFAULT_INTERACTION_CUTOFF,

514 help=project_init_help['interaction_cutoff'])

515run_parser_workflow_group.add_argument(

516 "-iauto", "--interactions_auto",

517 type=str,

518 nargs='?',

519 const=True,

520 help=project_init_help['interactions_auto'])

521run_parser_workflow_group.add_argument(

522 "-gb", "--guess_bonds",

523 action='store_true',

524 help=project_init_help['guess_bonds'])

525

526# Set a group for the selection options

527run_parser_selection_group = run_parser.add_argument_group('SELECTION OPTIONS')

528run_parser_selection_group.add_argument(

529 "-filt", "--filter_selection",

530 nargs='?',

531 default=False,

532 const=True,

533 help=project_init_help['filter_selection'])

534run_parser_selection_group.add_argument(

535 "-pbc", "--pbc_selection",

536 default=None,

537 help=project_init_help['pbc_selection'])

538run_parser_selection_group.add_argument(

539 "-cg", "--cg_selection",

540 default=None,

541 help=project_init_help['cg_selection'])

542run_parser_selection_group.add_argument(

543 "-pcafit", "--pca_fit_selection",

544 default=PROTEIN_AND_NUCLEIC_BACKBONE,

545 help=project_init_help['pca_fit_selection'])

546run_parser_selection_group.add_argument(

547 "-pcana", "--pca_analysis_selection",

548 default=PROTEIN_AND_NUCLEIC_BACKBONE,

549 help=project_init_help['pca_analysis_selection'])

550

551

552# Set a custom argparse action to handle the following 2 arguments

553# This is done becuase it is not possible to combine nargs='*' with const

554# https://stackoverflow.com/questions/72803090/argparse-how-to-create-the-equivalent-of-const-with-nargs

555class custom (Action):

556 # If argument is not passed -> default

557 # If argument is passed empty -> const

558 # If argument is passed with values -> values

559 def __call__(self, parser, namespace, values, option_string=None):

560 if values:

561 setattr(namespace, self.dest, values)

562 else:

563 setattr(namespace, self.dest, self.const)

564

565# Set a function to pretty print a list of available checkings / failures

566def pretty_list (availables : list[str]) -> str:

567 final_line = 'Available protocols:'

568 for available in availables:

569 nice_name = NICE_NAMES.get(available, None)

570 if not nice_name:

571 raise Exception('Flag "' + available + '" has not a defined nice name')

572 final_line += '\n - ' + available + ' -> ' + nice_name

573 final_line += f'\nTo know more about each test please visit:\n{test_docs_url}'

574 return final_line

575

576run_parser_checks_group = run_parser.add_argument_group('INPUT CHECKS OPTIONS', description=f"For more information about each check please visit:\n{test_docs_url}")

577run_parser_checks_group.add_argument(

578 "-t", "--trust",

579 type=str,

580 nargs='*',

581 default=[],

582 action=custom,

583 const=AVAILABLE_CHECKINGS,

584 choices=AVAILABLE_CHECKINGS,

585 help="If passed, do not run the specified checking. Note that all checkings are skipped if passed alone. " + pretty_list(AVAILABLE_CHECKINGS)

586)

587run_parser_checks_group.add_argument(

588 "-m", "--mercy",

589 type=str,

590 nargs='*',

591 default=[],

592 action=custom,

593 const=AVAILABLE_FAILURES,

594 choices=AVAILABLE_FAILURES,

595 help=("If passed, do not kill the process when any of the specfied checkings fail and proceed with the workflow. "

596 "Note that all checkings are allowed to fail if the argument is passed alone. " + pretty_list(AVAILABLE_FAILURES))

597)

598run_parser_checks_group.add_argument(

599 "-f", "--faith",

600 action='store_true',

601 default=False,

602 help=("Use this flag to force-skip all data processing thus asuming inputs are already processed.\n"

603 "WARNING: Do not use this flag if you don't know what you are doing.\n"

604 "This may lead to several silent errors.")

605)

606

607# Set a list with the alias of all requestable dependencies

608choices = sorted(list(requestables.keys()) + list(DEPENDENCY_FLAGS.keys()))

609

610run_parser_analysis_group = run_parser.add_argument_group('TASKS OPTIONS', description=f"Available tasks: {choices}\nFor more information about each task, please visit:\n{task_docs_url}")

611run_parser_analysis_group.add_argument(

612 "-i", "--include",

613 nargs='*',

614 choices=choices,

615 help="""Set the unique analyses or tools to be run. All other steps will be skipped. There are also some additional flags to define a preconfigured group of dependencies:

616 - download: Check/download input files (already ran with analyses)

617 - setup: Process and test input files (already ran with analyses)

618 - network: Run dependencies which require internet connection

619 - minimal: Run dependencies required by the web client to work

620 - interdeps: Run interactions and all its dependent analyses""")

621run_parser_analysis_group.add_argument(

622 "-e", "--exclude",

623 nargs='*',

624 choices=choices,

625 help=("Set the analyses or tools to be skipped. All other steps will be run. Note that if we exclude a dependency of something else then it will be run anyway."))

626run_parser_analysis_group.add_argument(

627 "-ow", "--overwrite",

628 type=str,

629 nargs='*',

630 default=[],

631 action=custom,

632 const=True,

633 choices=choices,

634 help=("Set the output files to be overwritten thus re-runing its corresponding analysis or tool. Use this flag alone to overwrite everything."))

635

636

637# Add a new to command to aid in the inputs file setup

638inputs_parser = subparsers.add_parser("inputs",

639 help="Set the inputs file",

640 formatter_class=CustomHelpFormatter,

641 parents=[common_parser]

642)

643

644# Chose the editor in advance

645inputs_parser.add_argument(

646 "-ed", "--editor",

647 choices=[*AVAILABLE_TEXT_EDITORS.values(), 'none'],

648 help="Set the text editor to modify the inputs file")

649

650# The convert command

651convert_parser = subparsers.add_parser("convert",

652 help="Convert a structure and/or several trajectories to other formats\n" +

653 "If several input trajectories are passed they will be merged previously",

654 formatter_class=CustomHelpFormatter,

655 parents=[common_parser])

656convert_parser.add_argument(

657 "-is", "--input_structure",

658 help="Path to input structure file")

659convert_parser.add_argument(

660 "-os", "--output_structure",

661 help="Path to output structure file")

662convert_parser.add_argument(

663 "-it", "--input_trajectories", nargs='*',

664 help="Path to input trajectory file or same format files.")

665convert_parser.add_argument(

666 "-ot", "--output_trajectory",

667 help="Path to output trajectory file")

668

669# The filter command

670filter_parser = subparsers.add_parser("filter",

671 help="Filter atoms in a structure and/or a trajectory\n",

672 formatter_class=CustomHelpFormatter,

673 parents=[common_parser])

674filter_parser.add_argument(

675 "-is", "--input_structure", required=True,

676 help="Path to input structure file")

677filter_parser.add_argument(

678 "-os", "--output_structure",

679 help="Path to output structure file")

680filter_parser.add_argument(

681 "-it", "--input_trajectory",

682 help="Path to input trajectory file")

683filter_parser.add_argument(

684 "-ot", "--output_trajectory",

685 help="Path to output trajectory file")

686filter_parser.add_argument(

687 "-sel", "--selection_string",

688 help="Atom selection")

689filter_parser.add_argument(

690 "-syn", "--selection_syntax", default='vmd',

691 help="Atom selection syntax (vmd by default)")

692

693# The subset command

694subset_parser = subparsers.add_parser("subset",

695 help="Get a subset of frames from the current trajectory",

696 formatter_class=CustomHelpFormatter,

697 parents=[common_parser])

698subset_parser.add_argument(

699 "-is", "--input_structure", required=True,

700 help="Path to input structure file")

701subset_parser.add_argument(

702 "-it", "--input_trajectory",

703 help="Path to input trajectory file")

704subset_parser.add_argument(

705 "-ot", "--output_trajectory",

706 help="Path to output trajectory file")

707subset_parser.add_argument(

708 "-start", "--start", type=int, default=0,

709 help="Start frame (0-based)")

710subset_parser.add_argument(

711 "-end", "--end", type=int, default=None,

712 help="End frame (0-based)")

713subset_parser.add_argument(

714 "-step", "--step", type=int, default=1,

715 help="Frame step")

716subset_parser.add_argument(

717 "-skip", "--skip", nargs='*', type=int, default=[],

718 help="Frames to be skipped (0-based)")

719subset_parser.add_argument(

720 "-fr", "--frames", nargs='*', type=int, default=[],

721 help="Frames to be returned (0-based). Input frame order is ignored as original frame order is conserved.")

722

723# The chainer command

724chainer_parser = subparsers.add_parser("chainer",

725 help="Edit structure (pdb) chains",

726 formatter_class=CustomHelpFormatter,

727 parents=[common_parser])

728chainer_parser.add_argument(

729 "-is", "--input_structure", required=True,

730 help="Path to input structure file")

731chainer_parser.add_argument(

732 "-os", "--output_structure", default='chained.pdb',

733 help="Path to output structure file")

734chainer_parser.add_argument(

735 "-sel", "--selection_string",

736 help="Atom selection (the whole structure by default)")

737chainer_parser.add_argument(

738 "-syn", "--selection_syntax", default='vmd',

739 choices=Structure.SUPPORTED_SELECTION_SYNTAXES,

740 help="Atom selection syntax (VMD syntax by default)")

741chainer_parser.add_argument(

742 "-let", "--letter",

743 help="New chain letter (one letter per fragment by default)")

744chainer_parser.add_argument(

745 "-whfr", "--whole_fragments", type=bool, default=False,

746 help="Consider fragments beyond the atom selection. Otherwise a fragment could end up having multiple chains.")

747

748# The NASSA commands

749nassa_parser = subparsers.add_parser("nassa", formatter_class=CustomHelpFormatter,

750 help="Run and set the configuration of the NASSA analysis",

751 parents=[common_parser])

752nassa_parser.add_argument(

753 "-c", "--config",

754 help="Configuration file for the NASSA analysis")

755nassa_parser.add_argument(

756 "-n", "--analysis_names",

757 nargs='*',

758 default=None,

759 help="Name of the analysis to be run. It can be: " + ', '.join(NASSA_ANALYSES_LIST))

760nassa_parser.add_argument(

761 "-w", "--make_config",

762 #type=str,

763 nargs='*',

764 default=None,

765 # const=True,

766 # action=custom,

767 help="Make a configuration file for the NASSA analysis: makecfg.\nThe base path could be given as an argument. If not, an example of configuration file is created.")

768nassa_parser.add_argument(

769 "-seq", "--seq_path",

770 type=str,

771 const=False,

772 action=custom,

773 help="Set the base path of the sequences. If not given, the sequences are searched in the current directory.")

774nassa_parser.add_argument(

775 "-o", "--output",

776 help="Output path for the NASSA analysis")

777nassa_parser.add_argument(

778 "-dir", "--working_directory",

779 default='.',

780 help="Directory where the whole workflow is run. Current directory by default.")

781nassa_parser.add_argument(

782 "-ow", "--overwrite",

783 type=str,

784 nargs='*',

785 default=[],

786 action=custom,

787 const=True,

788 help="Set the output files to be overwritten thus re-runing its corresponding analysis or tool")

789nassa_parser.add_argument(

790 "-own", "--overwrite_nassa",

791 type=str,

792 nargs='*',

793 default=[],

794 action=custom,

795 const=True,

796 help="Set the output files to be overwritten thus re-runing its corresponding analysis or tool for the NASSA analysis")

797nassa_parser.add_argument(

798 "-nseq", "--n_sequences",

799 type=int,

800 help="Number of sequences to be analyzed")

801nassa_parser.add_argument(

802 "-i", "--unit_len",

803 type=int,

804 default=6,

805 help="Number of base pairs to be analyzed")

806nassa_parser.add_argument(

807 "-hp", "--helical_parameters",

808 action='store_true',

809 default=False,

810 help="Run the helical parameters analysis")

811nassa_parser.add_argument(

812 "-pdirs", "--proj_directories",

813 nargs='*',

814 default=None,

815 help=("Path to the different project directories. Each directory is to contain an independent project.\n"

816 "Several output files will be generated in the same folder directory"))

817nassa_parser.add_argument(

818 "-all", "--all",

819 action='store_true',

820 default=False,

821 help="Run all the helical parameters and NASSA analyses")

822nassa_parser.add_argument(

823 "-stru", "--input_structure_filepath",

824 default=None,

825 help=("Path to input structure file. It may be relative to the project or to each MD directory.\n"

826 "If this value is not passed then the standard structure file is used as input by default"))

827nassa_parser.add_argument(

828 "-traj", "--input_trajectory_filepath",

829 nargs='*',

830 default=None,

831 help=("Path to input trajectory file. It is relative to each MD directory.\n"

832 "If this value is not passed then the standard trajectory file path is used as input by default"))

833nassa_parser.add_argument(

834 "-top", "--input_topology_filepath",

835 default=None, # There is no default since many formats may be possible

836 help="Path to input topology file. It is relative to the project directory.")

837nassa_parser.add_argument(

838 "-mdir", "--md_directories",

839 nargs='*',

840 default=None,

841 help=("Path to the different MD directories. Each directory is to contain an independent trajectory and structure.\n"

842 "Several output files will be generated in every MD directory"))

843nassa_parser.add_argument(

844 "-t", "--trust",

845 type=str,

846 nargs='*',

847 default=[],

848 action=custom,

849 const=AVAILABLE_CHECKINGS,

850 choices=AVAILABLE_CHECKINGS,

851 help="If passed, do not run the specified checking. Note that all checkings are skipped if passed alone.\n" + pretty_list(AVAILABLE_CHECKINGS)

852)

853nassa_parser.add_argument(

854 "-m", "--mercy",

855 type=str,

856 nargs='*',

857 default=[],

858 action=custom,

859 const=AVAILABLE_FAILURES,

860 choices=AVAILABLE_FAILURES,

861 help=("If passed, do not kill the process when any of the specfied checkings fail and proceed with the workflow.\n"

862 "Note that all checkings are allowed to fail if the argument is passed alone.\n" + pretty_list(AVAILABLE_FAILURES))

863)

864nassa_parser.add_argument(

865 "-dup", "--duplicates",

866 default=False,

867 action='store_true',

868 help="If passed, merge duplicate subunits if there is more than one, in the sequences. if not only the last will be selected"

869)

870

871# Dataset subcommand

872dataset_parser = subparsers.add_parser("dataset", formatter_class=CustomHelpFormatter,

873 help="Manage and process a dataset of MDDB projects.")

874dataset_subparsers = dataset_parser.add_subparsers(dest='dataset_subcommand', help='Dataset subcommands')

875

876# Dataset run subcommand

877dataset_run_parser = dataset_subparsers.add_parser("run", formatter_class=CustomHelpFormatter,

878 help="Run the workflow for a dataset of MDDB projects.",

879 parents=[common_parser])

880dataset_run_parser.add_argument("dataset_yaml", help="Path to the dataset YAML file.")

881dataset_run_parser.add_argument("--slurm", action="store_true", help="Submit the workflow to SLURM.")

882dataset_run_parser.add_argument("-jt", "--job-template", help="Path to the SLURM job template file. Required if --slurm is used.")

883dataset_run_parser.add_argument("-ig", "--include-groups", nargs='*', type=int, default=[], help="List of group IDs to be run.")

884dataset_run_parser.add_argument("-eg", "--exclude-groups", nargs='*', type=int, default=[], help="List of group IDs to be excluded.")

885

886# Dataset status subcommand

887dataset_status_parser = dataset_subparsers.add_parser("status", formatter_class=CustomHelpFormatter,

888 help="Show the status of projects in a dataset, grouped by their last log message.",

889 parents=[common_parser])

890dataset_status_parser.add_argument("dataset_yaml", help="Path to the dataset YAML file.")