Coverage for mddb_workflow/mwf.py: 79%

1263 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1#!/usr/bin/env python 

2 

3# This is the starter script 

4 

5# Import python libraries 

6from os import chdir, rename, remove, walk, mkdir, getcwd 

7from os.path import exists, isdir, isabs, relpath, normpath, split, basename 

8from shutil import rmtree 

9import sys 

10import io 

11import re 

12import numpy 

13from glob import glob 

14from inspect import getfullargspec 

15import time 

16 

17# Constants 

18# Importing constants first is important 

19from mddb_workflow.utils.constants import * 

20 

21# Import local utils 

22#from mddb_workflow.utils.httpsf import mount 

23from mddb_workflow.utils.auxiliar import InputError, MISSING_TOPOLOGY 

24from mddb_workflow.utils.auxiliar import warn, load_json, save_json, load_yaml, save_yaml 

25from mddb_workflow.utils.auxiliar import is_directory_empty, is_glob, parse_glob, safe_getattr 

26from mddb_workflow.utils.auxiliar import read_ndict, write_ndict, get_git_version 

27from mddb_workflow.utils.arg_cksum import get_cksum_id 

28from mddb_workflow.utils.register import Register 

29from mddb_workflow.utils.cache import Cache 

30from mddb_workflow.utils.structures import Structure 

31from mddb_workflow.utils.topologies import Topology 

32from mddb_workflow.utils.file import File 

33from mddb_workflow.utils.remote import Remote 

34from mddb_workflow.utils.pyt_spells import get_frames_count, get_average_structure 

35from mddb_workflow.utils.selections import Selection 

36from mddb_workflow.utils.mda_spells import get_mda_universe 

37from mddb_workflow.utils.type_hints import * 

38 

39# Import local tools 

40from mddb_workflow.tools.get_first_frame import get_first_frame 

41from mddb_workflow.tools.get_bonds import find_safe_bonds, get_bonds_reference_frame 

42from mddb_workflow.tools.process_interactions import process_interactions 

43from mddb_workflow.tools.generate_metadata import prepare_project_metadata, generate_md_metadata 

44from mddb_workflow.tools.generate_ligands_desc import generate_ligand_mapping 

45from mddb_workflow.tools.chains import prepare_chain_references 

46from mddb_workflow.tools.generate_pdb_references import prepare_pdb_references 

47from mddb_workflow.tools.residue_mapping import generate_residue_mapping 

48from mddb_workflow.tools.generate_map import generate_protein_mapping 

49from mddb_workflow.tools.generate_lipid_references import generate_lipid_references 

50from mddb_workflow.tools.generate_membrane_mapping import generate_membrane_mapping 

51from mddb_workflow.tools.generate_topology import generate_topology 

52from mddb_workflow.tools.get_charges import get_charges 

53from mddb_workflow.tools.get_inchi_keys import get_inchikeys 

54from mddb_workflow.tools.remove_trash import remove_trash 

55from mddb_workflow.tools.get_screenshot import get_screenshot 

56from mddb_workflow.tools.process_input_files import process_input_files 

57from mddb_workflow.tools.provenance import produce_provenance 

58 

59# Import local analyses 

60from mddb_workflow.analyses.rmsds import rmsds 

61from mddb_workflow.analyses.tmscores import tmscores 

62from mddb_workflow.analyses.rmsf import rmsf 

63from mddb_workflow.analyses.rgyr import rgyr 

64from mddb_workflow.analyses.pca import pca 

65from mddb_workflow.analyses.density import density 

66from mddb_workflow.analyses.thickness import thickness 

67from mddb_workflow.analyses.area_per_lipid import area_per_lipid 

68from mddb_workflow.analyses.lipid_order import lipid_order 

69from mddb_workflow.analyses.lipid_interactions import lipid_interactions 

70from mddb_workflow.analyses.channels import channels 

71#from mddb_workflow.analyses.pca_contacts import pca_contacts 

72from mddb_workflow.analyses.rmsd_per_residue import rmsd_per_residue 

73from mddb_workflow.analyses.rmsd_pairwise import rmsd_pairwise 

74from mddb_workflow.analyses.clusters import clusters_analysis 

75from mddb_workflow.analyses.distance_per_residue import distance_per_residue 

76from mddb_workflow.analyses.hydrogen_bonds import hydrogen_bonds 

77from mddb_workflow.analyses.sasa import sasa 

78from mddb_workflow.analyses.energies import energies 

79from mddb_workflow.analyses.dihedral_energies import compute_dihedral_energies 

80from mddb_workflow.analyses.pockets import pockets 

81from mddb_workflow.analyses.rmsd_check import check_trajectory_integrity 

82from mddb_workflow.analyses.helical_parameters import helical_parameters 

83from mddb_workflow.analyses.markov import markov 

84 

85# Make the system output stream to not be buffered 

86# Only when not in a Jupyter notebook or using pytest 

87# Check if we're in an interactive Python shell like Jupyter 

88if not hasattr(sys, 'ps1') and not 'pytest' in sys.modules: 

89 # This is useful to make prints work on time in Slurm 

90 # Otherwise, output logs are written after the script has fully run 

91 # Note that this fix affects all modules and built-ins 

92 unbuffered = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) 

93 sys.stdout = unbuffered 

94 

95# Set a special exception for missing inputs 

96MISSING_INPUT_EXCEPTION = Exception('Missing input') 

97 

98# Set a special exception for missing task function arguments 

99# This is used for easy debug when a new functions is added wrongly 

100MISSING_ARGUMENT_EXCEPTION = Exception('Missing argument') 

101 

102# Set a special exception for when a value is missing 

103MISSING_VALUE_EXCEPTION = Exception('Missing value') 

104 

105# Name of the argument used by all functions to know where to write output 

106OUTPUT_FILEPATH_ARG = 'output_filepath' 

107OUTPUT_DIRECTORY_ARG = 'output_directory' 

108 

109# Set some variables which are filled at the end but are referred by previously defined functions 

110requestables = {} 

111inverted_requestables = {} 

112 

113 

114class Task: 

115 """ Descriptor class to handle a generic task. 

116 It implements lazy properties, caching and overwriting. 

117 

118 Since its properties are static, results are stored in the parent object 

119 (MD/Project), or otherwise all MDs would share the same task values. """ 

120 

121 def __init__ (self, 

122 flag : str, 

123 name : str, 

124 func : Callable, 

125 args : dict = {}, 

126 output_filename : Optional[str] = None, 

127 use_cache : bool = True, 

128 debug : bool = False, 

129 ): 

130 """ 

131 Initialize the Task object. 

132 

133 Args: 

134 flag (str): 

135 The task flag. 

136 This name is used by the include/exclude/overwrite arguments and to name the analysis output directory. 

137 

138 name (str): 

139 The task user-friendly name is to be used in the logs. 

140 

141 func (Callable): 

142 The task function. 

143 Function argument names must correspond with Project/MD property names. 

144 

145 args (dict, optional): 

146 The task function additional arguments. 

147 Project/MD properties are automatically sent to the function as arguments. 

148 However some analyses have additional arguments (e.g. frames limit, cutoffs, etc.) 

149 

150 output_filename (str, optional): 

151 The task output filename. 

152 Path will be set automatically relative to its project/MD. 

153 For those tasks which generate a directory with multiple outputs this is not necessary. 

154 However this may come in handy by tasks with a single file output. 

155 Specially when this output file is used later in this workflow. 

156 

157 use_cache (bool, optional): 

158 Set if the returned output is to be cached. 

159 Note that argument values are always cached, this is not optional. 

160 

161 debug (bool, optional): 

162 If the task is run in debug mode, producing more output logs. Defaults to False. 

163 """ 

164 # Save input arguments 

165 self.flag = flag 

166 self.name = name 

167 self.func = func 

168 self.args = args 

169 self.output_filename = output_filename 

170 self.use_cache = use_cache 

171 self.debug = debug 

172 # Set the key used to store and retireve data in the parent and cache 

173 self.parent_output_key = f'_{self.flag}_task_output' 

174 self.parent_output_filepath_key = f'{self.flag}_task_output_filepath' 

175 self.cache_output_key = f'{self.flag}_task_output' 

176 self.cache_arg_cksums = f'{self.flag}_task_arg_cksums' 

177 # Para el arg_cksum 

178 self.__name__ = self.flag 

179 

180 # Set internal functions to handle parent saved output 

181 # This output is not saved in the task itself, but in the parent, because the task is static 

182 def _get_parent_output (self, parent): 

183 return safe_getattr(parent, self.parent_output_key, MISSING_VALUE_EXCEPTION) 

184 def _set_parent_output (self, parent, new_output): 

185 return setattr(parent, self.parent_output_key, new_output) 

186 def _get_parent_output_filepath (self, parent): 

187 return safe_getattr(parent, self.parent_output_filepath_key, MISSING_VALUE_EXCEPTION) 

188 def _set_parent_output_filepath (self, parent, new_filepath): 

189 return setattr(parent, self.parent_output_filepath_key, new_filepath) 

190 # Get the task output, running the task if necessary 

191 def get_output (self, parent): 

192 # If we already have a value stored from this run then return it 

193 output = self._get_parent_output(parent) 

194 if output != MISSING_VALUE_EXCEPTION: return output 

195 # Otherwise run the task and return the output 

196 return self(parent) 

197 output = property(get_output, None, None, "Task output (read only)") 

198 # Asking for the output file or filepath implies running the Task, then returning the file/filepath 

199 def get_output_filepath (self, parent) -> str: 

200 # If we already have a filepath stored from this run then return it 

201 filepath = self._get_parent_output_filepath(parent) 

202 if filepath != MISSING_VALUE_EXCEPTION: return filepath 

203 # Otherwise run the task and return the filepath 

204 self(parent) 

205 filepath = self._get_parent_output_filepath(parent) 

206 if filepath != MISSING_VALUE_EXCEPTION: return filepath 

207 raise ValueError(f'Task {self.flag} has no output filepath after running') 

208 output_filepath = property(get_output_filepath, None, None, "Task output filepath (read only)") 

209 def get_output_file (self, parent) -> str: 

210 filepath = self.get_output_filepath(parent) 

211 return File(filepath) 

212 output_file = property(get_output_file, None, None, "Task output file (read only)") 

213 

214 # When the task is printed, show the flag 

215 def __repr__ (self): return f'<Task ({self.flag})>' 

216 

217 # When a task is called 

218 def __call__(self, parent: Union['Project', 'MD']): 

219 # First of all check if this task has been already done in this very run 

220 # If so then return the stored vale 

221 output = self._get_parent_output(parent) 

222 if output != MISSING_VALUE_EXCEPTION: return output 

223 # Process the task function arguments 

224 processed_args = {} 

225 # Get the task function expected arguments 

226 specification = getfullargspec(self.func) 

227 expected_arguments = specification.args 

228 n_default_arguments = len(specification.defaults) if specification.defaults else 0 

229 # Find out which arguments are optional since they have default values 

230 default_arguments = set(expected_arguments[::-1][:n_default_arguments]) 

231 # If one of the expected arguments is the output_filename then set it here 

232 output_filepath = None 

233 writes_output_file = OUTPUT_FILEPATH_ARG in expected_arguments 

234 if writes_output_file: 

235 # The task should have a defined output file 

236 if not self.output_filename: 

237 raise RuntimeError(f'Task {self.flag} must have an "output_filename"') 

238 # Set the output file path 

239 output_filepath = parent.pathify(self.output_filename) 

240 self._set_parent_output_filepath(parent, output_filepath) 

241 # Add it to the processed args 

242 processed_args[OUTPUT_FILEPATH_ARG] = output_filepath 

243 # Remove the expected argument from the list 

244 expected_arguments.remove(OUTPUT_FILEPATH_ARG) 

245 # If one of the expected arguments is the output_directory then set it here 

246 # We will set a new directory with the flag name of the task, in the correspoding path 

247 # Note that while the task is beeing done the output directory has a different name 

248 # Thus the directory is hidden and marked as incomplete 

249 # The final output directory is the one without the incomplete prefix 

250 writes_output_dir = OUTPUT_DIRECTORY_ARG in expected_arguments 

251 incomplete_output_directory = None 

252 final_output_directory = None 

253 if writes_output_dir: 

254 # Set the output directory path 

255 incomplete_output_directory = parent.pathify(INCOMPLETE_PREFIX + self.flag) 

256 final_output_directory = incomplete_output_directory.replace(INCOMPLETE_PREFIX, '') 

257 # Add it to the processed args 

258 processed_args[OUTPUT_DIRECTORY_ARG] = incomplete_output_directory 

259 # Remove the expected argument from the list 

260 expected_arguments.remove(OUTPUT_DIRECTORY_ARG) 

261 # Iterate the reamining expected arguments 

262 for arg in expected_arguments: 

263 # First find the argument among the parent properties 

264 arg_value = self.find_arg_value(arg, parent, default_arguments) 

265 if arg_value == MISSING_ARGUMENT_EXCEPTION: continue 

266 # Add the processed argument 

267 processed_args[arg] = arg_value 

268 # Check again if the task has output already 

269 # It may happend that some dependencies assign output on their own 

270 # e.g. charges, bonds 

271 # If so then return the stored vale 

272 output = self._get_parent_output(parent) 

273 if output != MISSING_VALUE_EXCEPTION: return output 

274 # Find if we have cached output 

275 if self.use_cache: 

276 output = parent.cache.retrieve(self.cache_output_key, MISSING_VALUE_EXCEPTION) 

277 self._set_parent_output(parent, output) 

278 # Check if this dependency is to be overwriten 

279 forced_overwrite = self.flag in parent.overwritables 

280 # Get the list of inputs which have changed compared to a previous run 

281 # WARNING: Always get changed inputs, since this function updates the cache 

282 # If had_cache is false then it means this is the first time the task is ever done 

283 changed_inputs, had_cache, cache_cksums = self.get_changed_inputs(parent, processed_args) 

284 any_input_changed = len(changed_inputs) > 0 

285 # Update the cache inputs 

286 parent.cache.update(self.cache_arg_cksums, cache_cksums) 

287 # We must overwrite outputs either if inputs changed or if it was forced by the user 

288 must_overwrite = forced_overwrite or any_input_changed 

289 # Check if output already exists 

290 # If the final directory already exists then it means the task was started in a previous run 

291 existing_incomplete_output = writes_output_dir and exists(incomplete_output_directory) 

292 # If the final directory already exists then it means the task was done in a previous run 

293 existing_final_output = writes_output_dir and exists(final_output_directory) 

294 # If the output file already exists then it also means the task was done in a previous run 

295 existing_output_file = writes_output_file and exists(output_filepath) 

296 # If we already have a cached output result 

297 existing_output_data = output != MISSING_VALUE_EXCEPTION 

298 # If we must overwrite then purge previous outputs 

299 if must_overwrite: 

300 if existing_incomplete_output: rmtree(incomplete_output_directory) 

301 if existing_final_output: rmtree(final_output_directory) 

302 if existing_output_file: remove(output_filepath) 

303 if existing_output_data: parent.cache.delete(self.cache_output_key) 

304 # If already existing output is not to be overwritten then check if it is already what we need 

305 else: 

306 # If output files/directories are expected then they must exist 

307 # If output data is expected then it must be cached 

308 satisfied_output = (not writes_output_dir or exists(final_output_directory)) \ 

309 and (not writes_output_file or exists(output_filepath)) \ 

310 and (output != MISSING_VALUE_EXCEPTION) 

311 # If we already have the expected output then we can skip the task at all 

312 if satisfied_output: 

313 print(f'{GREY_HEADER}-> Task {self.flag} ({self.name}) already completed{COLOR_END}') 

314 return output 

315 # If we are at this point then we are missing some output so we must proceed to run the task 

316 # Use the final output directory instead of the incomplete one if exists 

317 # Note that we must check if it exists again since it may have been deleted since the last check 

318 if writes_output_dir and exists(final_output_directory): 

319 processed_args[OUTPUT_DIRECTORY_ARG] = final_output_directory 

320 # Create the incomplete output directory, if necessary 

321 missing_incomplete_output = writes_output_dir \ 

322 and not exists(incomplete_output_directory) \ 

323 and not exists(final_output_directory) 

324 if missing_incomplete_output: mkdir(incomplete_output_directory) 

325 # Finally call the function 

326 print(f'{GREEN_HEADER}-> Running task {self.flag} ({self.name}){COLOR_END}') 

327 start_time = time.time() 

328 # If the task is to be run again because an inputs changed then let the user know 

329 if any_input_changed and had_cache and not forced_overwrite: 

330 changes = ''.join([ '\n - ' + inp for inp in changed_inputs ]) 

331 print(f'{GREEN_HEADER} The task is run again since the following inputs changed:{changes}{COLOR_END}') 

332 # Save a few internal values the task although the task is static 

333 # We save it right before calling the function in case the function uses this task as input 

334 self.changed_inputs = changed_inputs 

335 self.cache_cksums = cache_cksums 

336 # Run the actual task 

337 output = self.func(**processed_args) 

338 end_time = time.time() 

339 print(f' Task {self.flag} completed in {end_time - start_time:.2f} seconds{COLOR_END}') 

340 self._set_parent_output(parent, output) 

341 # Set the output to be saved in cache 

342 # Note that all must be JSON serializable values 

343 cache_output = output 

344 # Update cache output unless it is marked to not save it 

345 if self.use_cache: parent.cache.update(self.cache_output_key, cache_output) 

346 # Update the overwritables so this is not remade further in the same run 

347 parent.overwritables.discard(self.flag) 

348 # As a brief cleanup, if the output directory is empty at the end, then remove it 

349 # Otheriwse, change the incomplete directory name to its final name 

350 if writes_output_dir and exists(incomplete_output_directory): 

351 if is_directory_empty(incomplete_output_directory): rmtree(incomplete_output_directory) 

352 else: rename(incomplete_output_directory, final_output_directory) 

353 # Now return the function result 

354 return output 

355 

356 def find_arg_value (self, arg : str, parent : Union['Project', 'MD'], default_arguments : set): 

357 """ Find argument values, thus running any dependency if necessary. """ 

358 # Word 'task' is reserved for getting the task itself 

359 if arg == 'task': return self 

360 # Word 'self' is reserved for getting the caller Project/MD 

361 if arg == 'self': return parent 

362 # Check if the argument is an MD property 

363 arg_value = safe_getattr(parent, arg, MISSING_ARGUMENT_EXCEPTION) 

364 if arg_value != MISSING_ARGUMENT_EXCEPTION: return arg_value 

365 # If the parent is an MD then it may happen the property is from the Project 

366 if isinstance(parent, MD): 

367 arg_value = safe_getattr(parent.project, arg, MISSING_ARGUMENT_EXCEPTION) 

368 if arg_value != MISSING_ARGUMENT_EXCEPTION: return arg_value 

369 # If the property is missing then search among the additional arguments 

370 arg_value = self.args.get(arg, MISSING_ARGUMENT_EXCEPTION) 

371 if arg_value != MISSING_ARGUMENT_EXCEPTION: return arg_value 

372 # It may also happen that the argument has a default value 

373 # If this is the case then we can skip it 

374 if arg in default_arguments: return MISSING_ARGUMENT_EXCEPTION 

375 # NEVER FORGET: Function arguments must have the same name that the Project/MD property 

376 # If the argument is still missing then you programmed the function wrongly or... 

377 # You may have forgotten the additional argument in the task args 

378 raise RuntimeError(f'Function "{self.func.__name__}" from task "{self.flag}" expects argument "{arg}" but it is missing') 

379 

380 def get_changed_inputs (self, 

381 parent : Union['Project', 'MD'], 

382 processed_args : dict) -> tuple[ list[str], bool ]: 

383 """ Find out if input arguments changed regarding the last run. """ 

384 # Get cache argument references 

385 cache_cksums = parent.cache.retrieve(self.cache_arg_cksums, MISSING_VALUE_EXCEPTION) 

386 had_cache = False if cache_cksums == MISSING_VALUE_EXCEPTION else True 

387 if cache_cksums == MISSING_VALUE_EXCEPTION: cache_cksums = {} 

388 # Check argument by argument 

389 # Keep a list with arguments which have changed 

390 unmatched_arguments = [] 

391 for arg_name, arg_value in processed_args.items(): 

392 # Skip the output directory argument 

393 # Changes in this argument are not actual changes 

394 if arg_name == OUTPUT_DIRECTORY_ARG: continue 

395 # Get the cksum from the new argument value 

396 new_cksum = get_cksum_id(arg_value) 

397 # Retrieve the cksum from the old argument value 

398 old_cksum = cache_cksums.get(arg_name, None) 

399 if self.debug: print(f'Task "{self.name}" -> argument "{arg_name}"\n' + 

400 f' new value: {arg_value}\n' + 

401 f' new value cksum: {new_cksum}\n' + 

402 f' old value cksum: {old_cksum}\n' + 

403 f' match: {new_cksum == old_cksum}') 

404 # Compare new and old cksums 

405 if new_cksum != old_cksum: 

406 # If we found a missmatch then add it to the list 

407 unmatched_arguments.append(arg_name) 

408 # Update the references 

409 cache_cksums[arg_name] = new_cksum 

410 return unmatched_arguments, had_cache, cache_cksums 

411 

412 

413class MD: 

414 """ A Molecular Dynamics (MD) is the union of a structure and a trajectory. 

415 Having this data several analyses are possible. 

416 Note that an MD is always defined inside of a Project and thus it has additional topology and metadata. """ 

417 

418 def __init__ (self, 

419 project : 'Project', 

420 number : int, 

421 directory : str, 

422 input_structure_filepath : str, 

423 input_trajectory_filepaths : list[str], 

424 ): 

425 """ 

426 Initialize the MD object. 

427  

428 Args: 

429 project (Project): The parent project this MD belongs to. 

430 number (int): The number of the MD according to its accession. 

431 directory (str): The local directory where the MD takes place. 

432 input_structure_filepath (str): The input structure file path. 

433 input_trajectory_filepaths (list[str]): The input trajectory file paths. 

434 """ 

435 

436 # Save the inputs 

437 self.project = project 

438 if not project: 

439 raise Exception('Project is mandatory to instantiate a new MD') 

440 # Save the MD number and index 

441 self.number = number 

442 self.index = number - 1 

443 # Set the MD accession and request URL 

444 self.accession = None 

445 self.remote = None 

446 if self.project.database_url and self.project.accession: 

447 self.accession = f'{self.project.accession}.{self.number}' 

448 self.remote = Remote(self.project.database_url, self.accession) 

449 # Save the directory 

450 self.directory = normpath(directory) 

451 # Now set the director relative to the project 

452 self.directory = self.project.pathify(self.directory) 

453 if normpath(self.directory) == normpath(self.project.directory): 

454 raise InputError(f'MD {self.number} has the same directory as the project: {self.directory}') 

455 # Save the directory name alone apart 

456 self.directory_location, self.directory_name = split(self.directory) 

457 # If the directory does not exists then create it 

458 if not exists(self.directory): 

459 mkdir(self.directory) 

460 # Save the input structure filepath 

461 # They may be relative to the project directory (unique) or relative to the MD directory (one per MD) 

462 # If the path is absolute then it is considered unique 

463 # If the file does not exist and it is to be downloaded then it is downloaded for each MD 

464 # Priorize the MD directory over the project directory 

465 self.arg_input_structure_filepath = input_structure_filepath 

466 self._input_structure_filepath = None 

467 # Set the internal variable for the input structure file, to be assigned later 

468 self._input_structure_file = None 

469 # Save the input trajectory filepaths 

470 self.arg_input_trajectory_filepaths = input_trajectory_filepaths 

471 self._input_trajectory_filepaths = None 

472 # Set the internal variable for the input trajectory files, to be assigned later 

473 self._input_trajectory_files = None 

474 

475 # Processed structure and trajectory files 

476 self._structure_file = None 

477 self._trajectory_file = None 

478 

479 # Other values which may be found/calculated on demand 

480 self._md_inputs = None 

481 self._structure = None 

482 

483 # Tests 

484 self._trajectory_integrity = None 

485 

486 # Set a new MD specific register 

487 # In case the directory is the project directory itself, use the project register 

488 register_filepath = self.pathify(REGISTER_FILENAME) 

489 register_file = File(register_filepath) 

490 if register_file.path == self.project.register.file.path: 

491 self.register = self.project.register 

492 else: 

493 self.register = Register(register_file) 

494 # Save also warnings apart since they are to be used as an input for metadata tasks 

495 self.warnings = self.register.warnings 

496 

497 # Set a new MD specific cache 

498 # In case the directory is the project directory itself, use the project cache 

499 cache_filepath = self.pathify(CACHE_FILENAME) 

500 cache_file = File(cache_filepath) 

501 if cache_file.path == self.project.cache.file.path: 

502 self.cache = self.project.cache 

503 else: 

504 self.cache = Cache(cache_file) 

505 

506 # Set tasks whose output is to be overwritten 

507 self.overwritables = set() 

508 

509 # Get MD inputs just to fill the inputs' "mds" value 

510 # Some functions may fail otherwise when its value is missing 

511 self.get_md_inputs() 

512 

513 def __repr__ (self): 

514 return 'MD' 

515 

516 def pathify (self, filename_or_relative_path : str) -> str: 

517 """ Given a filename or relative path, add the MD directory path at the beginning. """ 

518 return normpath(self.directory + '/' + filename_or_relative_path) 

519 

520 # Input structure file ------------ 

521 

522 def get_input_structure_filepath (self) -> str: 

523 """ Set a function to get input structure file path. """ 

524 # Return the internal value if it is already assigned 

525 if self._input_structure_filepath != None: 

526 return self._input_structure_filepath 

527 # Set a function to find out if a path is relative to MD directories or to the project directory 

528 # To do so just check if the file exists in any of those 

529 # In case it exists in both or none then assume it is relative to MD directory 

530 # Parse glob notation in the process 

531 def relativize_and_parse_paths (input_path : str, may_not_exist : bool = False) -> Optional[str]: 

532 # Check if it is an absolute path 

533 if isabs(input_path): 

534 abs_glob_parse = parse_glob(input_path) 

535 # If we had multiple results then we complain 

536 if len(abs_glob_parse) > 1: 

537 raise InputError(f'Multiple structures found with "{input_path}": {", ".join(abs_glob_parse)}') 

538 # If we had no results then we complain 

539 if len(abs_glob_parse) == 0: 

540 if self.remote: 

541 warn('Spread syntax is not supported to download remote files') 

542 raise InputError(f'No structure found with "{input_path}"') 

543 abs_parsed_filepath = abs_glob_parse[0] 

544 return abs_parsed_filepath 

545 # Check the MD directory 

546 md_relative_filepath = self.pathify(input_path) 

547 md_glob_parse = parse_glob(md_relative_filepath) 

548 if len(md_glob_parse) > 1: 

549 raise InputError(f'Multiple structures found with "{input_path}": {", ".join(md_glob_parse)}') 

550 md_parsed_filepath = md_glob_parse[0] if len(md_glob_parse) == 1 else None 

551 if md_parsed_filepath and File(md_parsed_filepath).exists: 

552 return md_parsed_filepath 

553 # Check the project directory 

554 project_relative_filepath = self.project.pathify(input_path) 

555 project_glob_parse = parse_glob(project_relative_filepath) 

556 if len(project_glob_parse) > 1: 

557 raise InputError(f'Multiple structures found with "{input_path}": {", ".join(project_glob_parse)}') 

558 project_parsed_filepath = project_glob_parse[0] if len(project_glob_parse) == 1 else None 

559 if project_parsed_filepath and File(project_parsed_filepath).exists: 

560 return project_parsed_filepath 

561 # At this point we can conclude the input structure file does not exist 

562 # If we have no paths at all then it means a glob pattern was passed and it didn't match 

563 # Note that if a glob pattern existed then it would mean the file actually existed 

564 if len(md_glob_parse) == 0 and len(project_glob_parse) == 0: 

565 # Warn the user in case it was trying to use glob syntax to donwload remote files 

566 if self.remote: 

567 warn('Spread syntax is not supported to download remote files') 

568 raise InputError('No trajectory file was reached neither in the project directory or MD directories in path(s) ' + ', '.join(input_path)) 

569 # If the path does not exist anywhere then we asume it will be downloaded and set it relative to the MD 

570 # However make sure we have a remote 

571 # As an exception, if the 'may not exist' flag is passed then we return the result even if there is no remote 

572 if not may_not_exist and not self.remote: 

573 raise InputError(f'Cannot find a structure file by "{input_path}" anywhere') 

574 md_parsed_filepath = self.project.pathify(input_path) if f'{self.directory_name}/' in md_parsed_filepath else self.pathify(input_path) 

575 return md_parsed_filepath 

576 # If we have a value passed through command line 

577 if self.arg_input_structure_filepath: 

578 # Find out if it is relative to MD directories or to the project directory 

579 self._input_structure_filepath = relativize_and_parse_paths(self.arg_input_structure_filepath) 

580 # Save the parsed value in the inputs file 

581 self.project.update_inputs( 

582 f'mds.{self.index}.input_structure_filepath', 

583 self._input_structure_filepath) 

584 return self._input_structure_filepath 

585 # If we have a value passed through the inputs file has the value 

586 if self.project.is_inputs_file_available(): 

587 # Get the input value, whose key must exist 

588 inputs_value = self.get_input('input_structure_filepath') 

589 # If there is a valid input then use it 

590 if inputs_value: 

591 self._input_structure_filepath = relativize_and_parse_paths(inputs_value) 

592 return self._input_structure_filepath 

593 # If there is not input structure anywhere then use the input topology 

594 # We will extract the structure from it using a sample frame from the trajectory 

595 # Note that topology input filepath must exist and an input error will raise otherwise 

596 # However if we are using the standard topology file we can not extract the PDB from it (yet) 

597 if self.project.input_topology_file != MISSING_TOPOLOGY and \ 

598 self.project.input_topology_file.filename != STANDARD_TOPOLOGY_FILENAME: 

599 return self.project.input_topology_file.path 

600 # If we can not use the topology either then surrender 

601 raise InputError('There is not input structure at all') 

602 

603 def get_input_structure_file (self) -> str: 

604 """ Get the input pdb filename from the inputs. 

605 If the file is not found try to download it. """ 

606 # If the input structure file is already defined then return it 

607 if self._input_structure_file: 

608 return self._input_structure_file 

609 # Otherwise we must set it 

610 # First set the input structure filepath 

611 input_structure_filepath = self.get_input_structure_filepath() 

612 # Now set the input structure file 

613 self._input_structure_file = File(input_structure_filepath) 

614 # If the file already exists then return it 

615 if self._input_structure_file.exists: 

616 return self._input_structure_file 

617 # Try to download it 

618 # If we do not have the required parameters to download it then we surrender here 

619 if not self.remote: 

620 raise InputError(f'Missing input structure file "{self._input_structure_file.path}"') 

621 # Download the structure 

622 # If the structure filename is the standard structure filename then use the structure endpoint instead 

623 if self._input_structure_file.filename == STRUCTURE_FILENAME: 

624 self.remote.download_standard_structure(self._input_structure_file) 

625 # Otherwise download the input strucutre file by its filename 

626 else: 

627 self.remote.download_file(self._input_structure_file) 

628 return self._input_structure_file 

629 input_structure_file = property(get_input_structure_file, None, None, "Input structure filename (read only)") 

630 

631 # Input trajectory filename ------------ 

632 

633 def get_input_trajectory_filepaths (self) -> str: 

634 """ Get the input trajectory file paths. """ 

635 # Return the internal value if it is already assigned 

636 if self._input_trajectory_filepaths != None: 

637 return self._input_trajectory_filepaths 

638 # Set a function to check and fix input trajectory filepaths 

639 # Also relativize paths to the current MD directory and parse glob notation 

640 def relativize_and_parse_paths (input_paths : list[str]) -> list[str]: 

641 checked_paths = input_paths 

642 # Input trajectory filepaths may be both a list or a single string 

643 # However we must keep a list 

644 if type(checked_paths) == list: 

645 pass 

646 elif type(checked_paths) == str: 

647 checked_paths = [ checked_paths ] 

648 else: 

649 raise InputError('Input trajectory filepaths must be a list of strings or a string') 

650 # Make sure all or none of the trajectory paths are absolute 

651 abs_count = sum([ isabs(path) for path in checked_paths ]) 

652 if not (abs_count == 0 or abs_count == len(checked_paths)): 

653 raise InputError('All trajectory paths must be relative or absolute. Mixing is not supported') 

654 # Set a function to glob-parse and merge all paths 

655 def parse_all_glob (paths : list[str]) -> list[str]: 

656 parsed_paths = [] 

657 for path in paths: 

658 parsed_paths += parse_glob(path) 

659 return parsed_paths 

660 # In case trajectory paths are absolute 

661 if abs_count > 0: 

662 absolute_parsed_paths = parse_all_glob(checked_paths) 

663 # Check we successfully defined some trajectory file 

664 if len(absolute_parsed_paths) == 0: 

665 # Warn the user in case it was trying to use glob syntax to donwload remote files 

666 if self.remote: 

667 warn('Spread syntax is not supported to download remote files') 

668 raise InputError('No trajectory file was reached neither in the project directory or MD directories in path(s) ' + ', '.join(input_paths)) 

669 return absolute_parsed_paths 

670 # If trajectory paths are not absolute then check if they are relative to the MD directory 

671 # Get paths relative to the current MD directory 

672 md_relative_paths = [ self.pathify(path) for path in checked_paths ] 

673 # In case there are glob characters we must parse the paths 

674 md_parsed_paths = parse_all_glob(md_relative_paths) 

675 # Check we successfully defined some trajectory file 

676 if len(md_parsed_paths) > 0: 

677 # If so, check at least one of the files do actually exist 

678 if any([ File(path).exists for path in md_parsed_paths ]): 

679 return md_parsed_paths 

680 # If no trajectory files where found then asume they are relative to the project 

681 # Get paths relative to the project directory 

682 project_relative_paths = [ self.project.pathify(path) for path in checked_paths ] 

683 # In case there are glob characters we must parse the paths 

684 project_parsed_paths = parse_all_glob(project_relative_paths) 

685 # Check we successfully defined some trajectory file 

686 if len(project_parsed_paths) > 0: 

687 # If so, check at least one of the files do actually exist 

688 if any([ File(path).exists for path in project_parsed_paths ]): 

689 return project_parsed_paths 

690 # At this point we can conclude the input trajectory file does not exist 

691 # If we have no paths at all then it means a glob pattern was passed and it didn't match 

692 # Note that if a glob pattern existed then it would mean the file actually existed 

693 if len(md_parsed_paths) == 0 and len(project_parsed_paths) == 0: 

694 # Warn the user in case it was trying to use glob syntax to donwload remote files 

695 if self.remote: 

696 warn('Spread syntax is not supported to download remote files') 

697 raise InputError('No trajectory file was reached neither in the project directory or MD directories in path(s) ' + ', '.join(input_paths)) 

698 # If we have a path however it may be downloaded from the database if we have a remote 

699 if not self.remote: 

700 raise InputError(f'Cannot find anywhere a trajectory file with path(s) "{", ".join(input_paths)}"') 

701 # Note that if input path was not glob based it will be both as project relative and MD relative 

702 if len(md_parsed_paths) == 0: raise ValueError('This should never happen') 

703 # If file is to be downloaded then we must make sure the path is relative to the project 

704 project_relative_paths = [ 

705 self.project.pathify(path) if f'{self.directory_name}/' in path else self.pathify(path) for path in checked_paths 

706 ] 

707 return project_relative_paths 

708 # If we have a value passed through command line 

709 if self.arg_input_trajectory_filepaths: 

710 self._input_trajectory_filepaths = relativize_and_parse_paths(self.arg_input_trajectory_filepaths) 

711 # Save the parsed value in the inputs file 

712 self.project.update_inputs( 

713 f'mds.{self.index}.input_trajectory_filepaths', 

714 self._input_trajectory_filepaths) 

715 return self._input_trajectory_filepaths 

716 # Check if the inputs file has the value 

717 if self.project.is_inputs_file_available(): 

718 # Get the input value 

719 inputs_value = self.get_input('input_trajectory_filepaths') 

720 if inputs_value: 

721 self._input_trajectory_filepaths = relativize_and_parse_paths(inputs_value) 

722 return self._input_trajectory_filepaths 

723 # If there is no trajectory available then we surrender 

724 raise InputError('There is not input trajectory at all') 

725 

726 def get_input_trajectory_files (self) -> str: 

727 """ Get the input trajectory filename(s) from the inputs. 

728 If file(s) are not found try to download it. """ 

729 # If we already defined input trajectory files then return them 

730 if self._input_trajectory_files != None: 

731 return self._input_trajectory_files 

732 # Otherwise we must set the input trajectory files 

733 input_trajectory_filepaths = self.get_input_trajectory_filepaths() 

734 self._input_trajectory_files = [ File(path) for path in input_trajectory_filepaths ] 

735 # Find missing trajectory files 

736 missing_input_trajectory_files = [] 

737 for trajectory_file in self._input_trajectory_files: 

738 if not trajectory_file.exists: 

739 missing_input_trajectory_files.append(trajectory_file) 

740 # If all files already exists then we are done 

741 if len(missing_input_trajectory_files) == 0: 

742 return self._input_trajectory_files 

743 # Try to download the missing files 

744 # If we do not have the required parameters to download it then we surrender here 

745 if not self.remote: 

746 missing_filepaths = [ trajectory_file.path for trajectory_file in missing_input_trajectory_files ] 

747 raise InputError('Missing input trajectory files: ' + ', '.join(missing_filepaths)) 

748 # Download each trajectory file (ususally it will be just one) 

749 for trajectory_file in self._input_trajectory_files: 

750 # If this is the main trajectory (the usual one) then use the dedicated endpoint 

751 if trajectory_file.filename == TRAJECTORY_FILENAME: 

752 frame_selection = f'1:{self.project.sample_trajectory}:1' if self.project.sample_trajectory else None 

753 self.remote.download_trajectory(trajectory_file, frame_selection=frame_selection, format='xtc') 

754 # Otherwise, download it by its filename 

755 else: 

756 self.remote.download_file(trajectory_file) 

757 return self._input_trajectory_files 

758 input_trajectory_files = property(get_input_trajectory_files, None, None, "Input trajectory filenames (read only)") 

759 

760 def get_md_inputs (self) -> dict: 

761 """ Get MD specific inputs. """ 

762 # If we already have a value stored then return it 

763 if self._md_inputs: 

764 return self._md_inputs 

765 # Otherwise we must find its value 

766 # If we have MD inputs in the inputs file then use them 

767 if self.project.input_mds: 

768 # Iterate over the different MD inputs to find out each directory 

769 # We must find the MD inputs whcih belong to this specific MD according to this directory 

770 for md in self.project.input_mds: 

771 # Get the directory according to the inputs 

772 directory = md.get(MD_DIRECTORY, None) 

773 if directory: 

774 check_directory(directory) 

775 # If no directory is specified in the inputs then guess it from the MD name 

776 else: 

777 name = md.get('name', None) 

778 if not name: raise InputError('There is a MD with no name and no directory. Please define at least one of them.') 

779 directory = name_2_directory(name) 

780 # If the directory matches then this is our MD inputs 

781 if self.project.pathify(directory) == self.directory: 

782 self._md_inputs = md 

783 return self._md_inputs 

784 # If this MD directory has not associated inputs then it means it was passed through command line 

785 # We set a new MD inputs for it 

786 new_md_name = directory_2_name(self.directory) 

787 self._md_inputs = { 'name': new_md_name, 'mdir': self.directory } 

788 # Update the inputs file with the new MD inputs 

789 mds = self.project.inputs.get('mds', None) 

790 if mds == None: mds = [] 

791 new_mds_inputs = [ *mds, self._md_inputs ] 

792 self.project.update_inputs('mds', new_mds_inputs) 

793 return self._md_inputs 

794 

795 md_inputs = property(get_md_inputs, None, None, "MD specific inputs (read only)") 

796 

797 def get_input (self, name: str): 

798 """ Get a specific 'input' value from MD inputs. """ 

799 value = self.md_inputs.get(name, MISSING_INPUT_EXCEPTION) 

800 # If we had a value then return it 

801 if value != MISSING_INPUT_EXCEPTION: 

802 return value 

803 return self.project.get_input(name) 

804 

805 # --------------------------------- 

806 

807 def get_file (self, target_file : File) -> bool: 

808 """ Check if a file exists. If not, try to download it from the database. 

809 If the file is not found in the database it is fine, we do not even warn the user. 

810 Note that this function is used to get populations and transitions files, which are not common. """ 

811 # If it exists we are done 

812 if target_file.exists: 

813 return True 

814 # Try to download the missing file 

815 # If we do not have the required parameters to download it then we surrender here 

816 if not self.remote: 

817 return False 

818 # Check if the file is among the available remote files 

819 # If it is no then stop here 

820 if target_file.filename not in self.remote.available_files: 

821 return False 

822 # Download the file 

823 self.remote.download_file(target_file) 

824 return True 

825 

826 def print_tests_summary (self): 

827 """ Make a summary of tests and their status. """ 

828 print('Tests summary:') 

829 for test_name in AVAILABLE_CHECKINGS: 

830 test_result = self.register.tests.get(test_name, None) 

831 # Print things pretty 

832 test_nice_name = NICE_NAMES[test_name] 

833 test_nice_result = None 

834 if test_result == None: 

835 test_nice_result = YELLOW_HEADER + 'Not run' + COLOR_END 

836 elif test_result == False: 

837 test_nice_result = RED_HEADER + 'Failed' + COLOR_END 

838 elif test_result == True: 

839 test_nice_result = GREEN_HEADER + 'Passed' + COLOR_END 

840 elif test_result == 'na': 

841 test_nice_result = BLUE_HEADER + 'Not applicable' + COLOR_END 

842 else: 

843 raise ValueError() 

844 

845 print(f' - {test_nice_name} -> {test_nice_result}') 

846 

847 # Issue some warnings if failed or never run tests are skipped 

848 # This is run after processing input files 

849 def _issue_required_test_warnings (self): 

850 for test_name in AVAILABLE_CHECKINGS: 

851 # If test was not skipped then proceed 

852 if test_name not in self.project.trust: continue 

853 # If test passed in a previous run the proceed 

854 test_result = self.register.tests.get(test_name) 

855 if test_result == True: continue 

856 # If test failed in a previous run we can also proceed 

857 # The failing warning must be among the inherited warnings, so there is no need to add more warnings here 

858 if test_result == False: continue 

859 # If the test has been always skipped then issue a warning 

860 if test_result == None: 

861 # Remove previous warnings 

862 self.register.remove_warnings(test_name) 

863 # Get test pretty name 

864 test_nice_name = NICE_NAMES[test_name] 

865 # Issue the corresponding warning  

866 self.register.add_warning(test_name, test_nice_name + ' was skipped and never run before') 

867 continue 

868 raise ValueError('Test value is not supported') 

869 

870 # Processed files ---------------------------------------------------- 

871 

872 # Run the actual processing to generate output processed files out of input raw files 

873 # And by "files" I mean structure, trajectory and topology 

874 input_files_processing = Task('inpro', 'Input files processing', process_input_files) 

875 

876 def get_structure_file (self) -> str: 

877 """ Get the processed structure file. """ 

878 # If we have a stored value then return it 

879 # This means we already found or generated this file 

880 if self._structure_file: 

881 return self._structure_file 

882 # Set the file 

883 structure_filepath = self.pathify(STRUCTURE_FILENAME) 

884 self._structure_file = File(structure_filepath) 

885 # If the faith flag was passed then simply make sure the input file makes sense 

886 if self.project.faith: 

887 if self.input_structure_file != self._structure_file: 

888 raise InputError('Input structure file is not equal to output structure file but the "faith" flag was used.\n' 

889 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

890 if not self.input_structure_file.exists: 

891 raise InputError('Input structure file does not exist but the "faith" flag was used.\n' 

892 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

893 return self._structure_file 

894 # Run the processing logic 

895 self.input_files_processing(self) 

896 # Now that the file is sure to exist we return it 

897 return self._structure_file 

898 structure_file = property(get_structure_file, None, None, "Structure file (read only)") 

899 

900 def get_trajectory_file (self) -> str: 

901 """ Get the processed trajectory file. """ 

902 # If we have a stored value then return it 

903 # This means we already found or generated this file 

904 if self._trajectory_file: 

905 return self._trajectory_file 

906 # If the file already exists then we are done 

907 trajectory_filepath = self.pathify(TRAJECTORY_FILENAME) 

908 self._trajectory_file = File(trajectory_filepath) 

909 # If the faith flag was passed then simply make sure the input file makes sense 

910 if self.project.faith: 

911 if len(self.input_trajectory_files) > 1: 

912 raise InputError('Several input trajectory files but the "faith" flag was used.\n' 

913 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

914 sample = self.input_trajectory_files[0] 

915 if sample != self._trajectory_file: 

916 raise InputError('Input trajectory file is not equal to output trajectory file but the "faith" flag was used.\n' 

917 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

918 if not self._trajectory_file.exists: 

919 raise InputError('Input trajectory file does not exist but the "faith" flag was used.\n' 

920 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

921 return self._trajectory_file 

922 # Run the processing logic 

923 self.input_files_processing(self) 

924 # Now that the file is sure to exist we return it 

925 return self._trajectory_file 

926 trajectory_file = property(get_trajectory_file, None, None, "Trajectory file (read only)") 

927 

928 def get_topology_file (self) -> str: 

929 """ Get the processed topology from the project. """ 

930 return self.project.get_topology_file() 

931 topology_file = property(get_topology_file, None, None, 

932 "Topology filename from the project (read only)") 

933 

934 # --------------------------------------------------------------------------------- 

935 # Others values which may be found/calculated and files to be generated on demand 

936 # --------------------------------------------------------------------------------- 

937 

938 # Trajectory snapshots 

939 get_snapshots = Task('frames', 'Count trajectory frames', get_frames_count) 

940 snapshots = property(get_snapshots, None, None, "Trajectory snapshots (read only)") 

941 

942 def get_reference_bonds (self) -> list[list[int]]: 

943 """ Get the reference bonds. """ 

944 return self.project.reference_bonds 

945 reference_bonds = property(get_reference_bonds, None, None, "Atom bonds to be trusted (read only)") 

946 

947 def get_structure (self) -> 'Structure': 

948 """ Get the parsed structure. """ 

949 # If we already have a stored value then return it 

950 if self._structure: 

951 return self._structure 

952 # Otherwise we must set the structure 

953 # Make sure the structure file exists at this point 

954 if not self.structure_file.exists: 

955 raise ValueError('Trying to set standard structure but file ' 

956 f'{self.structure_file.path} does not exist yet. Are you trying ' 

957 'to access the standard structure before processing input files?') 

958 # Note that this is not only the structure class, but it also contains additional logic 

959 self._structure = Structure.from_pdb_file(self.structure_file.path) 

960 # If the stable bonds test failed and we had mercy then it is sure our structure will have wrong bonds 

961 # In order to make it coherent with the topology we will mine topology bonds from here and force them in the structure 

962 # If we fail to get bonds from topology then just go along with the default structure bonds 

963 if not self.register.tests.get(STABLE_BONDS_FLAG, None): 

964 self._structure.bonds = self.reference_bonds 

965 # Same procedure if we have coarse grain atoms 

966 elif self.cg_selection: 

967 self._structure.bonds = self.reference_bonds 

968 return self._structure 

969 structure = property(get_structure, None, None, "Parsed structure (read only)") 

970 

971 # First frame PDB file 

972 get_first_frame = Task('firstframe', 'Get first frame structure', 

973 get_first_frame, output_filename = FIRST_FRAME_FILENAME) 

974 get_first_frame_file = get_first_frame.get_output_file 

975 first_frame_file = property(get_first_frame_file, None, None, "First frame (read only)") 

976 

977 # Average structure filename 

978 get_average_structure = Task('average', 'Get average structure', 

979 get_average_structure, output_filename = AVERAGE_STRUCTURE_FILENAME) 

980 get_average_structure_file = get_average_structure.get_output_file 

981 average_structure_file = property(get_average_structure_file, None, None, "Average structure filename (read only)") 

982 

983 # Produce the MD metadata file to be uploaded to the database 

984 prepare_metadata = Task('mdmeta', 'Prepare MD metadata', 

985 generate_md_metadata, output_filename=OUTPUT_METADATA_FILENAME) 

986 

987 # The processed interactions 

988 get_processed_interactions = Task('inter', 'Interaccions processing', 

989 process_interactions, { 'frames_limit': 1000 }) 

990 interactions = property(get_processed_interactions, None, None, "Processed interactions (read only)") 

991 

992 # MDAnalysis Universe object 

993 get_MDAnalysis_Universe = Task('mda_univ', 'MDAnalysis Universe object', 

994 get_mda_universe, use_cache = False) 

995 universe = property(get_MDAnalysis_Universe, None, None, "MDAnalysis Universe object (read only)") 

996 

997 def input_getter (name : str): 

998 """ Function to get input values which may be MD specific. 

999 If the MD input is missing then we use the project input value. """ 

1000 # Set the getter 

1001 def getter (self): 

1002 # Get the MD input 

1003 value = self.md_inputs.get(name, None) 

1004 if value != None: 

1005 return value 

1006 # If there is no MD input then return the project value 

1007 return getattr(self.project, f'input_{name}') 

1008 return getter 

1009 

1010 # Assign the MD input getters 

1011 input_interactions = property(input_getter('interactions'), None, None, "Interactions to be analyzed (read only)") 

1012 input_pbc_selection = property(input_getter('pbc_selection'), None, None, "Selection of atoms which are still in periodic boundary conditions (read only)") 

1013 input_cg_selection = property(input_getter('cg_selection'), None, None, "Selection of atoms which are not actual atoms but coarse grain beads (read only)") 

1014 

1015 def _set_pbc_selection (self, reference_structure : 'Structure', verbose : bool = False) -> 'Selection': 

1016 """ Internal function to set PBC selection. 

1017 It may parse the inputs file selection string if it is available or guess it otherwise. """ 

1018 # Otherwise we must set the PBC selection 

1019 if verbose: print('Setting Periodic Boundary Conditions (PBC) atoms selection') 

1020 selection_string = None 

1021 # If there is inputs file then get the input pbc selection 

1022 if self.project.is_inputs_file_available(): 

1023 if verbose: print(' Using selection string in the inputs file') 

1024 selection_string = self.input_pbc_selection 

1025 # If there is no inputs file we guess PBC atoms automatically 

1026 else: 

1027 if verbose: print(' No inputs file -> Selection string will be set automatically') 

1028 selection_string = 'auto' 

1029 # Parse the selection string using the reference structure 

1030 parsed_selection = None 

1031 # If the input PBC selection is 'auto' then guess it automatically 

1032 if selection_string == 'auto': 

1033 # To guess PBC atoms (with the current implementation) we must make sure ther eis no CG 

1034 if reference_structure.has_cg(): 

1035 raise InputError('We can not guess PBC atoms in CG systems. Please set PBC atoms manually.\n' 

1036 ' Use the "-pbc" argument or set the inputs file "pbc_selection" field.') 

1037 if verbose: print(' Guessing PBC atoms as solvent, counter ions and lipids') 

1038 parsed_selection = reference_structure.select_pbc_guess() 

1039 # If we have a valid input value then use it 

1040 elif selection_string: 

1041 if verbose: print(f' Selecting PBC atoms "{selection_string}"') 

1042 parsed_selection = reference_structure.select(selection_string) 

1043 if not parsed_selection: 

1044 raise InputError(f'PBC selection "{selection_string}" selected no atoms') 

1045 # If we have an input value but it is empty then we set an empty selection 

1046 else: 

1047 if verbose: print(' No PBC atoms selected') 

1048 parsed_selection = Selection() 

1049 # Log a few of the selected residue names 

1050 if verbose and parsed_selection: 

1051 print(f' Parsed PBC selection has {len(parsed_selection)} atoms') 

1052 selected_residues = reference_structure.get_selection_residues(parsed_selection) 

1053 selected_residue_names = list(set([ residue.name for residue in selected_residues ])) 

1054 limit = 3 # Show a maximum of 3 residue names 

1055 example_residue_names = ', '.join(selected_residue_names[0:limit]) 

1056 if len(selected_residue_names) > limit: example_residue_names += ', etc.' 

1057 print(' e.g. ' + example_residue_names) 

1058 return parsed_selection 

1059 

1060 def get_pbc_selection (self) -> 'Selection': 

1061 """ Get the periodic boundary conditions atom selection. """ 

1062 # If we already have a stored value then return it 

1063 if self.project._pbc_selection != None: 

1064 return self.project._pbc_selection 

1065 # Otherwise we must set the PBC selection 

1066 self.project._pbc_selection = self._set_pbc_selection(self.structure) 

1067 return self.project._pbc_selection 

1068 pbc_selection = property(get_pbc_selection, None, None, "Periodic boundary conditions atom selection (read only)") 

1069 

1070 # WARNING: Do not inherit project pbc residues 

1071 # WARNING: It may trigger all the processing logic of the reference MD when there is no need 

1072 def get_pbc_residues (self) -> list[int]: 

1073 """ Get indices of residues in periodic boundary conditions. """ 

1074 # If we already have a stored value then return it 

1075 if self.project._pbc_residues: 

1076 return self.project._pbc_residues 

1077 # If there is no inputs file then asume there are no PBC residues 

1078 if not self.pbc_selection: 

1079 self.project._pbc_residues = [] 

1080 return self.project._pbc_residues 

1081 # Otherwise we parse the selection and return the list of residue indices  

1082 self.project._pbc_residues = self.structure.get_selection_residue_indices(self.pbc_selection) 

1083 print(f'PBC residues "{self.input_pbc_selection}" -> {len(self.project._pbc_residues)} residues') 

1084 return self.project._pbc_residues 

1085 pbc_residues = property(get_pbc_residues, None, None, "Indices of residues in periodic boundary conditions (read only)") 

1086 

1087 # DANI: Esto algún día habría que tratar de automatizarlo 

1088 def _set_cg_selection (self, reference_structure : 'Structure', verbose : bool = False) -> 'Selection': 

1089 """ Set the coarse grain selection. """ 

1090 if verbose: print('Setting Coarse Grained (CG) atoms selection') 

1091 # If there is no inputs file then asum there is no CG selection 

1092 if not self.project.is_inputs_file_available(): 

1093 if verbose: print(' No inputs file -> Asuming there is no CG at all') 

1094 return Selection() 

1095 # Otherwise we use the selection string from the inputs 

1096 if verbose: print(' Using selection string in the inputs file') 

1097 selection_string = self.input_cg_selection 

1098 # If the selection is empty, again, assume there is no CG selection 

1099 if not selection_string: 

1100 if verbose: print(' Empty selection -> There is no CG at all') 

1101 return Selection() 

1102 # Otherwise, process it 

1103 # If we have a valid input value then use it 

1104 elif selection_string: 

1105 if verbose: print(f' Selecting CG atoms "{selection_string}"') 

1106 parsed_selection = reference_structure.select(selection_string) 

1107 # If we have an input value but it is empty then we set an empty selection 

1108 else: 

1109 if verbose: print(' No CG atoms selected') 

1110 parsed_selection = Selection() 

1111 # Lof the parsed selection size 

1112 if verbose: print(f' Parsed CG selection has {len(parsed_selection)} atoms') 

1113 # Log a few of the selected residue names 

1114 if verbose and parsed_selection: 

1115 selected_residues = reference_structure.get_selection_residues(parsed_selection) 

1116 selected_residue_names = list(set([ residue.name for residue in selected_residues ])) 

1117 limit = 3 # Show a maximum of 3 residue names 

1118 example_residue_names = ', '.join(selected_residue_names[0:limit]) 

1119 if len(selected_residue_names) > limit: example_residue_names += ', etc.' 

1120 print(' e.g. ' + example_residue_names) 

1121 return parsed_selection 

1122 

1123 def get_cg_selection (self) -> 'Selection': 

1124 """ Get the coarse grain atom selection. """ 

1125 # If we already have a stored value then return it 

1126 if self.project._cg_selection: 

1127 return self.project._cg_selection 

1128 # Otherwise we must set the PBC selection 

1129 self.project._cg_selection = self._set_cg_selection(self.structure) 

1130 return self.project._cg_selection 

1131 cg_selection = property(get_cg_selection, None, None, "Periodic boundary conditions atom selection (read only)") 

1132 

1133 # WARNING: Do not inherit project cg residues 

1134 # WARNING: It may trigger all the processing logic of the reference MD when there is no need 

1135 def get_cg_residues (self) -> list[int]: 

1136 """ Get indices of residues in coarse grain. """ 

1137 # If we already have a stored value then return it 

1138 if self.project._cg_residues: 

1139 return self.project._cg_residues 

1140 # If there is no inputs file then asume there are no cg residues 

1141 if not self.cg_selection: 

1142 self.project._cg_residues = [] 

1143 return self.project._cg_residues 

1144 # Otherwise we parse the selection and return the list of residue indices  

1145 self.project._cg_residues = self.structure.get_selection_residue_indices(self.cg_selection) 

1146 print(f'CG residues "{self.input_cg_selection}" -> {len(self.project._cg_residues)} residues') 

1147 return self.project._cg_residues 

1148 cg_residues = property(get_cg_residues, None, None, "Indices of residues in coarse grain (read only)") 

1149 

1150 def get_populations (self) -> list[float]: 

1151 """ Get equilibrium populations from a MSM from the project. """ 

1152 return self.project.populations 

1153 populations = property(get_populations, None, None, "Equilibrium populations from a MSM (read only)") 

1154 

1155 def get_transitions (self) -> list[list[float]]: 

1156 """ Get transition probabilities from a MSM from the project. """ 

1157 return self.project.transitions 

1158 transitions = property(get_transitions, None, None, "Transition probabilities from a MSM (read only)") 

1159 

1160 def get_protein_map (self) -> dict: 

1161 """ Get the residues mapping from the project. """ 

1162 return self.project.protein_map 

1163 protein_map = property(get_protein_map, None, None, "Residues mapping (read only)") 

1164 

1165 def get_charges (self) -> dict: 

1166 """ Get the residues mapping from the project. """ 

1167 return self.project.charges 

1168 charges = property(get_charges, None, None, "Residues charges (read only)") 

1169 

1170 # Reference frame 

1171 get_reference_frame = Task('reframe', 'Reference frame', get_bonds_reference_frame) 

1172 reference_frame = property(get_reference_frame, None, None, "Reference frame to be used to represent the MD (read only)") 

1173 

1174 # --------------------------------------------------------------------------------- 

1175 # Tests 

1176 # --------------------------------------------------------------------------------- 

1177 

1178 def is_trajectory_integral (self) -> Optional[bool]: 

1179 """ Sudden jumps test. """ 

1180 # If we already have a stored value then return it 

1181 if self._trajectory_integrity != None: 

1182 return self._trajectory_integrity 

1183 # Otherwise we must find the value 

1184 self._trajectory_integrity = check_trajectory_integrity( 

1185 input_structure_filename = self.structure_file.path, 

1186 input_trajectory_filename = self.trajectory_file.path, 

1187 structure = self.structure, 

1188 pbc_selection = self.pbc_selection, 

1189 mercy = self.project.mercy, 

1190 trust = self.project.trust, 

1191 register = self.register, 

1192 # time_length = self.time_length, 

1193 check_selection = ALL_ATOMS, 

1194 standard_deviations_cutoff = self.project.rmsd_cutoff, 

1195 snapshots = self.snapshots, 

1196 ) 

1197 return self._trajectory_integrity 

1198 

1199 # --------------------------------------------------------------------------------- 

1200 # Analyses 

1201 # --------------------------------------------------------------------------------- 

1202 

1203 # RMSDs analysis 

1204 run_rmsds_analysis = Task('rmsds', 'RMSDs analysis', 

1205 rmsds, { 'frames_limit': 5000 }) 

1206 

1207 # TM scores analysis 

1208 run_tmscores_analysis = Task('tmscore', 'TM scores analysis', 

1209 tmscores, { 'frames_limit': 200 }) 

1210 

1211 # RMSF, atom fluctuation analysis 

1212 run_rmsf_analysis = Task('rmsf', 'Fluctuation analysis', rmsf) 

1213 

1214 # Radius of gyration analysis 

1215 run_rgyr_analysis = Task('rgyr', 'Radius of gyration analysis', 

1216 rgyr, { 'frames_limit': 5000 }) 

1217 

1218 # PCA, principal component analysis 

1219 run_pca_analysis = Task('pca', 'Principal component analysis', 

1220 pca, { 'frames_limit': 2000, 'projection_frames': 20 }) 

1221 

1222 # PCA contacts 

1223 # DANI: Intenta usar mucha memoria, hay que revisar 

1224 # DANI: Puede saltar un error de imposible alojar tanta memoria 

1225 # DANI: Puede comerse toda la ram y que al final salte un error de 'Terminado (killed)' 

1226 # DANI: De momento me lo salto 

1227 # DANI: Lleva mucho tiempo sin mantenerse, habrá que cambiar varias cosas al recuperarlo 

1228 # run_pca_contacts('pcacons', 'PCA contacts', pca_contacts) 

1229 

1230 # RMSD per residue analysis 

1231 run_rmsd_perres_analysis = Task('perres', 'RMSD per residue analysis', 

1232 rmsd_per_residue, { 'frames_limit': 100 }) 

1233 

1234 # RMSD pairwise 

1235 # Perform an analysis for the overall structure and then one more analysis for each interaction 

1236 run_rmsd_pairwise_analysis = Task('pairwise', 'RMSD pairwise', 

1237 rmsd_pairwise, { 'frames_limit': 200, 'overall_selection': "name CA or name C5'" }) 

1238 

1239 # Run the cluster analysis 

1240 run_clusters_analysis = Task('clusters', 'Clusters analysis', 

1241 clusters_analysis, { 'frames_limit': 1000, 'desired_n_clusters': 20 }) 

1242 

1243 # Calculate the distance mean and standard deviation of each pair of residues 

1244 run_dist_perres_analysis = Task('dist', 'Distance per residue', 

1245 distance_per_residue, { 'frames_limit': 200 }) 

1246 

1247 # Hydrogen bonds 

1248 run_hbonds_analysis = Task('hbonds', 'Hydrogen bonds analysis', 

1249 hydrogen_bonds, { 'time_splits': 100 }) 

1250 

1251 # SASA, solvent accessible surface analysis 

1252 run_sas_analysis = Task('sas', 'Solvent accessible surface analysis', 

1253 sasa, { 'frames_limit': 100 }) 

1254 

1255 # Perform the electrostatic and vdw energies analysis for each pair of interaction agents 

1256 run_energies_analysis = Task('energies', 'Energies analysis', 

1257 energies, { 'frames_limit': 100 }) 

1258 

1259 # Calculate torsions and then dihedral energies for every dihedral along the trajectory 

1260 run_dihedral_energies = Task('dihedrals', 'Dihedral energies analysis', 

1261 compute_dihedral_energies, { 'frames_limit': 100 }) 

1262 

1263 # Perform the pockets analysis 

1264 run_pockets_analysis = Task('pockets', 'Pockets analysis', 

1265 pockets, { 'frames_limit': 100, 'maximum_pockets_number': 10 }) 

1266 

1267 # Helical parameters 

1268 run_helical_analysis = Task('helical', 'Helical parameters', helical_parameters) 

1269 

1270 # Markov 

1271 run_markov_analysis = Task('markov', 'Markov', markov, { 'rmsd_selection': PROTEIN_AND_NUCLEIC }) 

1272 

1273 # Membrane density analysis 

1274 run_density_analysis = Task('density', 'Membrane density analysis', 

1275 density, { 'frames_limit': 1000 }) 

1276 

1277 # Membrane thickness analysis 

1278 run_thickness_analysis = Task('thickness', 'Membrane thickness analysis', 

1279 thickness, { 'frames_limit': 100 }) 

1280 

1281 # Area per lipid analysis 

1282 run_apl_analysis = Task('apl', 'Membrane area per lipid analysis', area_per_lipid) 

1283 

1284 # Calculate lipid order parameters for membranes 

1285 run_lipid_order_analysis = Task('lorder', 'Membrane lipid order analysis', 

1286 lipid_order, { 'frames_limit': 100 }) 

1287 

1288 # Lipid-protein interactions analysis 

1289 run_lipid_interactions_analysis = Task('linter', 'Membrane lipid-protein interactions analysis', 

1290 lipid_interactions, { 'frames_limit': 100 }) 

1291 

1292 run_lipid_interactions_analysis = Task('channels', 'Membrane channels analysis', 

1293 channels, { 'frames_limit': 10 }) 

1294 

1295 

1296class Project: 

1297 """ Class for the main project of an MDDB accession. 

1298 A project is a set of related MDs. 

1299 These MDs share all or most topology and metadata. """ 

1300 

1301 def __init__ (self, 

1302 directory : str = '.', 

1303 accession : Optional[str] = None, 

1304 database_url : str = DEFAULT_API_URL, 

1305 inputs_filepath : str = None, 

1306 input_topology_filepath : Optional[str] = None, 

1307 input_structure_filepath : Optional[str] = None, 

1308 input_trajectory_filepaths : Optional[list[str]] = None, 

1309 md_directories : Optional[list[str]] = None, 

1310 md_config : Optional[list[list[str]]] = None, 

1311 reference_md_index : Optional[int] = None, 

1312 populations_filepath : str = DEFAULT_POPULATIONS_FILENAME, 

1313 transitions_filepath : str = DEFAULT_TRANSITIONS_FILENAME, 

1314 aiida_data_filepath : Optional[str] = None, 

1315 filter_selection : bool | str = False, 

1316 pbc_selection : Optional[str] = None, 

1317 cg_selection : Optional[str] = None, 

1318 image : bool = False, 

1319 fit : bool = False, 

1320 translation : list[float] = [0, 0, 0], 

1321 mercy : list[str] | bool = [], 

1322 trust : list[str] | bool = [], 

1323 faith : bool = False, 

1324 pca_analysis_selection : str = PROTEIN_AND_NUCLEIC_BACKBONE, 

1325 pca_fit_selection : str = PROTEIN_AND_NUCLEIC_BACKBONE, 

1326 rmsd_cutoff : float = DEFAULT_RMSD_CUTOFF, 

1327 interaction_cutoff : float = DEFAULT_INTERACTION_CUTOFF, 

1328 interactions_auto : Optional[str] = None, 

1329 guess_bonds : bool = False, 

1330 sample_trajectory : Optional[int] = None, 

1331 ): 

1332 """ 

1333 Initialize a Project. 

1334 

1335 Args: 

1336 directory (str): 

1337 Local directory where the project takes place. 

1338 accession (Optional[str]): 

1339 Project accession to download missing input files from the database (if already uploaded). 

1340 database_url (str): 

1341 API URL to download missing data. when an accession is provided. 

1342 inputs_filepath (str): 

1343 Path to a file with inputs for metadata, simulation parameters and analysis config. 

1344 input_topology_filepath (Optional[str]):  

1345 Path to input topology file relative to the project directory. 

1346 Multiple formats accepted; default is our parsed JSON topology. 

1347 input_structure_filepath (Optional[str]):  

1348 Path to input structure file. It may be relative to the project or to each MD directory. 

1349 If this value is not passed then the standard structure file is used as input by default. 

1350 input_trajectory_filepaths (Optional[list[str]]):  

1351 Paths to input trajectory files relative to each MD directory. 

1352 If this value is not passed then the standard trajectory file path is used as input by default. 

1353 md_directories (Optional[list[str]]): 

1354 Path to the different MD directories. 

1355 Each directory is to contain an independent trajectory and structure. 

1356 Several output files will be generated in every MD directory. 

1357 md_config (Optional[list]): 

1358 Configuration of a specific MD. You may declare as many as you want. 

1359 Every MD requires a directory name and at least one trajectory path. 

1360 The structure is -md <directory> <trajectory_1> <trajectory_2> ... 

1361 Note that all trajectories from the same MD will be merged. 

1362 For legacy reasons, you may also provide a specific structure for an MD. 

1363 e.g. -md <directory> <structure> <trajectory_1> <trajectory_2> ... 

1364 reference_md_index (Optional[int]): 

1365 Index of the reference MD (used by project-level functions; defaults to first MD). 

1366 populations_filepath (str): 

1367 Path to equilibrium populations file (Markov State Model only) 

1368 transitions_filepath (str): 

1369 Path to transition probabilities file (Markov State Model only). 

1370 aiida_data_filepath (Optional[str]): 

1371 Path to the AiiDA data file. 

1372 This file may be generated by the aiida-gromacs plugin and contains provenance data. 

1373 filter_selection (bool|str): 

1374 Atoms selection to be filtered in VMD format. 

1375 If the argument is passed alone (i.e. with no selection) then water and counter ions are filtered. 

1376 pbc_selection (Optional[str]): 

1377 Selection of atoms which stay in Periodic Boundary Conditions even after imaging the trajectory. 

1378 e.g. remaining solvent, ions, membrane lipids, etc. 

1379 Selection passed through console overrides the one in inputs file. 

1380 cg_selection (Optional[str]): 

1381 Selection of atoms which are not actual atoms but Coarse Grained beads. 

1382 Selection passed through console overrides the one in inputs file. 

1383 image (bool): 

1384 Set if the trajectory is to be imaged so atoms stay in the PBC box. See -pbc for more information. 

1385 fit (bool): 

1386 Set if the trajectory is to be fitted (both rotation and translation) to minimize the RMSD to PROTEIN_AND_NUCLEIC_BACKBONE selection. 

1387 translation (list[float]): 

1388 Set the x y z translation for the imaging process. 

1389 e.g. -trans 0.5 -1 0 

1390 mercy (list[str]|bool): 

1391 Failures to be tolerated (or boolean to set all/none). 

1392 trust (list[str]|bool): 

1393 Tests to skip/trust (or boolean to set all/none). 

1394 faith (bool): 

1395 If True, require input files to match expected output files and skip processing. 

1396 pca_analysis_selection (str): 

1397 Atom selection for PCA analysis in VMD syntax. 

1398 pca_fit_selection (str): 

1399 Atom selection for the PCA fitting in VMD syntax. 

1400 rmsd_cutoff (float): 

1401 Set the cutoff for the RMSD sudden jumps analysis to fail. 

1402 This cutoff stands for the number of standard deviations away from the mean an RMSD value is to be. 

1403 interaction_cutoff (float): 

1404 Set the cutoff for the interactions analysis to fail. 

1405 This cutoff stands for percent of the trajectory where the interaction happens (from 0 to 1). 

1406 interactions_auto (Optional[str]): 

1407 Guess input interactions automatically. A VMD selection may be passed to limit guessed interactions to a specific subset of atoms. 

1408 guess_bonds (bool): 

1409 Force the workflow to guess atom bonds based on distance and atom radii in different frames along the trajectory instead of mining topology bonds. 

1410 sample_trajectory (Optional[int]): 

1411 If passed, download the first 10 (by default) frames from the trajectory. 

1412 You can specify a different number by providing an integer value. 

1413 """ 

1414 # Save input parameters 

1415 self.directory = normpath(directory) 

1416 # If it is an absolute path then make it relative to the project 

1417 if isabs(self.directory): 

1418 self.directory = relpath(self.directory) 

1419 # Save the directory name alone apart 

1420 if self.directory == '.': 

1421 self.directory_name = basename(getcwd()) 

1422 else: 

1423 self.directory_name = basename(self.directory) 

1424 

1425 self.database_url = database_url 

1426 self.accession = accession 

1427 # Set the project URL in case we have the required data 

1428 self.remote = None 

1429 if self.database_url and self.accession: 

1430 self.remote = Remote(self.database_url, self.accession) 

1431 

1432 # Set the inputs file 

1433 # Set the expected default name in case there is no inputs file since it may be downloaded 

1434 self._inputs_file = File(self.pathify(DEFAULT_INPUTS_FILENAME)) 

1435 # If there is an input filepath then use it 

1436 if inputs_filepath: 

1437 self._inputs_file = File(inputs_filepath) 

1438 # Otherwise guess the inputs file using the accepted filenames 

1439 else: 

1440 for filename in ACCEPTED_INPUT_FILENAMES: 

1441 inputs_file = File(filename) 

1442 if inputs_file.exists: 

1443 self._inputs_file = inputs_file 

1444 break 

1445 # Set the input topology file 

1446 # Note that even if the input topology path is passed we do not check it exists 

1447 # Never forget we can donwload some input files from the database on the fly 

1448 self.arg_input_topology_filepath = input_topology_filepath 

1449 self._input_topology_filepath = None 

1450 self._input_topology_file = None 

1451 # Input structure and trajectory filepaths 

1452 # Do not parse them to files yet, let this to the MD class 

1453 self.input_structure_filepath = input_structure_filepath 

1454 self.input_trajectory_filepaths = input_trajectory_filepaths 

1455 

1456 # Make sure the new MD configuration (-md) was not passed as well as old MD inputs (-mdir, -stru, -traj) 

1457 if md_config and (md_directories or input_trajectory_filepaths): 

1458 raise InputError('MD configurations (-md) is not compatible with old MD inputs (-mdir, -traj)') 

1459 # Save the MD configurations 

1460 self.md_config = md_config 

1461 # Make sure MD configuration has the correct format 

1462 if self.md_config: 

1463 # Make sure all MD configurations have at least 3 values each 

1464 for mdc in self.md_config: 

1465 if len(mdc) < 2: 

1466 raise InputError('Wrong MD configuration: the patter is -md <directory> <trajectory> <trajectory 2> ...') 

1467 # Make sure there are no duplictaed MD directories 

1468 md_directories = [ mdc[0] for mdc in self.md_config ] 

1469 if len(md_directories) > len(set(md_directories)): 

1470 raise InputError('There are duplicated MD directories') 

1471 

1472 # Input populations and transitions for MSM 

1473 self.populations_filepath = populations_filepath 

1474 self._populations_file = File(self.populations_filepath) 

1475 self.transitions_filepath = transitions_filepath 

1476 self._transitions_file = File(self.transitions_filepath) 

1477 # Input AiiDA data 

1478 self.aiida_data_filepath = aiida_data_filepath 

1479 self._aiida_data_file = File(self.aiida_data_filepath) if aiida_data_filepath else None 

1480 

1481 # Set the processed topology filepath, which depends on the input topology filename 

1482 # Note that this file is different from the standard topology, although it may be standard as well 

1483 self._topology_filepath = None 

1484 self._topology_file = None 

1485 

1486 # Set the standard topology file 

1487 self._standard_topology_file = None 

1488 

1489 # Set the MD directories 

1490 self._md_directories = md_directories 

1491 # Check input MDs are correct to far 

1492 if self._md_directories: 

1493 self.check_md_directories() 

1494 

1495 # Set the reference MD 

1496 self._reference_md = None 

1497 self._reference_md_index = reference_md_index 

1498 

1499 # Set the rest of inputs 

1500 # Note that the filter selection variable is not handled here at all 

1501 # This is just pased to the filtering function which knows how to handle the default 

1502 self.filter_selection = filter_selection 

1503 # PBC selection may come from the console or from the inputs 

1504 self._input_pbc_selection = pbc_selection 

1505 self._input_cg_selection = cg_selection 

1506 self.image = image 

1507 self.fit = fit 

1508 self.translation = translation 

1509 self.mercy = mercy 

1510 # Fix the mercy input, if needed 

1511 # If a boolean is passed instead of a list then we set its corresponding value 

1512 if type(mercy) == bool: 

1513 if mercy: 

1514 self.mercy = AVAILABLE_FAILURES 

1515 else: 

1516 self.mercy = [] 

1517 self.trust = trust 

1518 # Fix the trust input, if needed 

1519 # If a boolean is passed instead of a list then we set its corresponding value 

1520 if type(trust) == bool: 

1521 if trust: 

1522 self.trust = AVAILABLE_CHECKINGS 

1523 else: 

1524 self.trust = [] 

1525 self.faith = faith 

1526 self.pca_analysis_selection = pca_analysis_selection 

1527 self.pca_fit_selection = pca_fit_selection 

1528 self.rmsd_cutoff = rmsd_cutoff 

1529 self.interaction_cutoff = interaction_cutoff 

1530 self.sample_trajectory = sample_trajectory 

1531 self.interactions_auto = interactions_auto 

1532 self.guess_bonds = guess_bonds 

1533 # Set the inputs, where values from the inputs file will be stored 

1534 self._inputs = None 

1535 

1536 # Other values which may be found/calculated on demand 

1537 self._pbc_selection = None 

1538 self._pbc_residues = None 

1539 self._cg_selection = None 

1540 self._cg_residues = None 

1541 self._reference_bonds = None 

1542 self._topology_reader = None 

1543 self._dihedrals = None 

1544 self._populations = None 

1545 self._transitions = None 

1546 self._pdb_ids = None 

1547 self._mds = None 

1548 

1549 # Force a couple of extraordinary files which is generated if atoms are resorted 

1550 self.resorted_bonds_file = File(self.pathify(RESORTED_BONDS_FILENAME)) 

1551 self.resorted_charges_file = File(self.pathify(RESORTED_CHARGES_FILENAME)) 

1552 

1553 # Set a new entry for the register 

1554 # This is useful to track previous workflow runs and problems 

1555 register_filepath = self.pathify(REGISTER_FILENAME) 

1556 register_file = File(register_filepath) 

1557 self.register = Register(register_file) 

1558 # Save also warnings apart since they are to be used as an input for metadata tasks 

1559 self.warnings = self.register.warnings 

1560 

1561 # Set the cache 

1562 cache_filepath = self.pathify(CACHE_FILENAME) 

1563 cache_file = File(cache_filepath) 

1564 self.cache = Cache(cache_file) 

1565 

1566 # Set tasks whose output is to be overwritten 

1567 self.overwritables = set() 

1568 

1569 def __repr__ (self): 

1570 return 'Project' 

1571 

1572 def pathify (self, filename_or_relative_path : str) -> str: 

1573 """ Given a filename or relative path, add the project directory path at the beginning. """ 

1574 return normpath(self.directory + '/' + filename_or_relative_path) 

1575 

1576 # Check MD directories to be right 

1577 # If there is any problem then directly raise an input error 

1578 def check_md_directories (self): 

1579 # Check there is at least one MD 

1580 if len(self._md_directories) < 1: 

1581 raise InputError('There must be at least one MD') 

1582 # Check there are not duplicated MD directories 

1583 if len(set(self._md_directories)) != len(self._md_directories): 

1584 raise InputError('There are duplicated MD directories') 

1585 

1586 # Set a function to get MD directories 

1587 def get_md_directories (self) -> list: 

1588 # If MD directories are already declared then return them 

1589 if self._md_directories: 

1590 return self._md_directories 

1591 # Otherwise use the default MDs 

1592 self._md_directories = [] 

1593 # Use the MDs from the inputs file when available 

1594 if self.is_inputs_file_available() and self.input_mds: 

1595 for input_md in self.input_mds: 

1596 # Get the directory according to the inputs 

1597 directory = input_md.get(MD_DIRECTORY, None) 

1598 if directory: 

1599 check_directory(directory) 

1600 # If no directory is specified in the inputs then guess it from the MD name 

1601 else: 

1602 name = input_md['name'] 

1603 if not name: 

1604 name = 'unnamed' 

1605 directory = name_2_directory(name) 

1606 self._md_directories.append(directory) 

1607 # Otherwise, guess MD directories by checking which directories include a register file 

1608 else: 

1609 available_directories = sorted(next(walk(self.directory))[1]) 

1610 for directory in available_directories: 

1611 if exists(directory + '/' + REGISTER_FILENAME): 

1612 self._md_directories.append(directory) 

1613 # If we found no MD directory then it means MDs were never declared before 

1614 if len(self._md_directories) == 0: 

1615 raise InputError('Impossible to know which are the MD directories. ' 

1616 'You can either declare them using the "-mdir" option or by providing an inputs file') 

1617 self.check_md_directories() 

1618 return self._md_directories 

1619 md_directories = property(get_md_directories, None, None, "MD directories (read only)") 

1620 

1621 # Set the reference MD index 

1622 def get_reference_md_index (self) -> int: 

1623 # If we are already have a value then return it 

1624 if self._reference_md_index: 

1625 return self._reference_md_index 

1626 # Otherwise we must find the reference MD index 

1627 # If the inputs file is available then it must declare the reference MD index 

1628 if self.is_inputs_file_available(): 

1629 self._reference_md_index = self.get_input('mdref') 

1630 # Otherwise we simply set the first MD as the reference and warn the user about this 

1631 if self._reference_md_index == None: 

1632 warn('No reference MD was specified. The first MD will be used as reference.') 

1633 self._reference_md_index = 0 

1634 return self._reference_md_index 

1635 reference_md_index = property(get_reference_md_index, None, None, "Reference MD index (read only)") 

1636 

1637 # Set the reference MD 

1638 def get_reference_md (self) -> MD: 

1639 # If we are already have a value then return it 

1640 if self._reference_md: 

1641 return self._reference_md 

1642 # Otherwise we must find the reference MD 

1643 self._reference_md = self.mds[self.reference_md_index] 

1644 return self._reference_md 

1645 reference_md: MD = property(get_reference_md, None, None, "Reference MD (read only)") 

1646 

1647 # Setup the MDs 

1648 def get_mds (self) -> list: 

1649 """ Get the available MDs (read only). """ 

1650 # If MDs are already declared then return them 

1651 if self._mds: 

1652 return self._mds 

1653 # Now instantiate a new MD for each declared MD and save the reference MD 

1654 self._mds = [] 

1655 # New system with MD configurations (-md) 

1656 if self.md_config: 

1657 for n, config in enumerate(self.md_config, 1): 

1658 directory = config[0] 

1659 # LEGACY  

1660 # In a previous version, the md config argument also holded the structure 

1661 # This was the second argument, so we check if we have more than 2 arguments 

1662 # If this is the case, then check if the second argument has different format 

1663 # Note that PDB format is also a trajectory supported format 

1664 has_structure = False 

1665 if len(config) > 2: 

1666 first_sample = File(config[1]) 

1667 second_sample = File(config[2]) 

1668 if first_sample.format != second_sample.format: 

1669 has_structure = True 

1670 # Finally set the input structure and trajectories 

1671 input_structure_filepath = config[1] if has_structure else self.input_structure_filepath 

1672 input_trajectory_filepaths = config[2:] if has_structure else config[1:] 

1673 # Define the MD 

1674 md = MD( 

1675 project = self, number = n, directory = directory, 

1676 input_structure_filepath = input_structure_filepath, 

1677 input_trajectory_filepaths = input_trajectory_filepaths, 

1678 ) 

1679 self._mds.append(md) 

1680 # Old system (-mdir, -stru -traj) 

1681 else: 

1682 for n, md_directory in enumerate(self.md_directories, 1): 

1683 md = MD( 

1684 project = self, number = n, directory = md_directory, 

1685 input_structure_filepath = self.input_structure_filepath, 

1686 input_trajectory_filepaths = self.input_trajectory_filepaths, 

1687 ) 

1688 self._mds.append(md) 

1689 return self._mds 

1690 mds: list[MD] = property(get_mds, None, None, "Available MDs (read only)") 

1691 

1692 # Check input files exist when their filenames are read 

1693 # If they do not exist then try to download them 

1694 # If the download is not possible then raise an error 

1695 

1696 # Inputs filename ------------ 

1697 

1698 def is_inputs_file_available (self) -> bool: 

1699 """ Set a function to check if inputs file is available. 

1700 Note that asking for it when it is not available will lead to raising an input error. """ 

1701 # If name is not declared then it is impossible to reach it 

1702 if not self._inputs_file: 

1703 return False 

1704 # If the file already exists then it is available 

1705 if self._inputs_file.exists: 

1706 return True 

1707 # If it does not exist but it may be downloaded then it is available 

1708 if self.remote: 

1709 return True 

1710 return False 

1711 

1712 def get_inputs_file (self) -> File: 

1713 """ Set a function to load the inputs yaml file. """ 

1714 # There must be an inputs filename 

1715 if not self._inputs_file: 

1716 raise InputError('Not defined inputs filename') 

1717 # If the file already exists then we are done 

1718 if self._inputs_file.exists: 

1719 return self._inputs_file 

1720 # Try to download it 

1721 # If we do not have the required parameters to download it then we surrender here 

1722 if not self.remote: 

1723 raise InputError(f'Missing inputs file "{self._inputs_file.filename}"') 

1724 # Download the inputs json file if it does not exists 

1725 self.remote.download_inputs_file(self._inputs_file) 

1726 return self._inputs_file 

1727 inputs_file = property(get_inputs_file, None, None, "Inputs filename (read only)") 

1728 

1729 # Topology filename ------------ 

1730 

1731 def get_input_topology_filepath (self) -> Optional[str]: 

1732 """ Get the input topology filepath from the inputs or try to guess it. 

1733 If the input topology filepath is a 'no' flag then we consider there is no topology at all 

1734 So far we extract atom charges and atom bonds from the topology file 

1735 In this scenario we can keep working but there are some consecuences: 

1736 1 - Analysis using atom charges such as 'energies' will be skipped 

1737 2 - The standard topology file will not include atom charges 

1738 3 - Bonds will be guessed 

1739 """ 

1740 # If we arelady have an internal value calculated then return it 

1741 if self._input_topology_filepath != None: 

1742 return self._input_topology_filepath 

1743 # Set a function to parse possible glob notation 

1744 def parse (filepath : str) -> str: 

1745 # If there is no glob pattern then just return the string as is 

1746 if not is_glob(filepath): 

1747 return filepath 

1748 # If there is glob pattern then parse it 

1749 parsed_filepaths = glob(filepath) 

1750 if len(parsed_filepaths) == 0: 

1751 # Warn the user in case it was trying to use glob syntax to donwload remote files 

1752 if self.remote: 

1753 warn('Spread syntax is not supported to download remote files') 

1754 raise InputError(f'No topologies found with "{filepath}"') 

1755 if len(parsed_filepaths) > 1: 

1756 raise InputError(f'Multiple topologies found with "{filepath}": {", ".join(parsed_filepaths)}') 

1757 return parsed_filepaths[0] 

1758 # If this value was passed through command line then it would be set as the internal value already 

1759 if self.arg_input_topology_filepath: 

1760 if self.arg_input_topology_filepath.lower() in { 'no', 'not', 'na' }: 

1761 self._input_topology_filepath = MISSING_TOPOLOGY 

1762 return self._input_topology_filepath 

1763 self._input_topology_filepath = parse(self.arg_input_topology_filepath) 

1764 # Update the input topology fielpath in the inputs file, in case it is not matching 

1765 self.update_inputs('input_topology_filepath', relpath(self._input_topology_filepath, self.directory)) 

1766 return self._input_topology_filepath 

1767 # Check if the inputs file has the value 

1768 if self.is_inputs_file_available(): 

1769 # Get the input value, whose key must exist 

1770 inputs_value = self.get_input('input_topology_filepath') 

1771 # If there is a valid input then use it 

1772 if inputs_value != None: 

1773 # WARNING: the yaml parser automatically converts 'no' to False 

1774 if inputs_value == False or inputs_value.lower() in { 'no', 'not', 'na' }: 

1775 self._input_topology_filepath = MISSING_TOPOLOGY 

1776 return self._input_topology_filepath 

1777 parsed_input_value = parse(inputs_value) 

1778 self._input_topology_filepath = self.pathify(parsed_input_value) 

1779 return self._input_topology_filepath 

1780 # If nothing worked then surrender 

1781 raise InputError('Missing input topology file path. Please provide a topology file using the "-top" argument.\n' + 

1782 ' Note that you may run the workflow without a topology file. To do so, use the "-top no" argument.\n' + 

1783 ' However this has implications since we usually mine atom charges and bonds from the topology file.\n' + 

1784 ' Some analyses such us the interaction energies will be skiped') 

1785 

1786 def get_input_topology_file (self) -> Optional[File]: 

1787 """ Get the input topology file. 

1788 If the file is not found try to download it. """ 

1789 # If we already have a value then return it 

1790 if self._input_topology_file != None: 

1791 return self._input_topology_file 

1792 # Set the input topology filepath 

1793 input_topology_filepath = self.get_input_topology_filepath() 

1794 # If the input filepath is None then it menas we must proceed without a topology 

1795 if input_topology_filepath == MISSING_TOPOLOGY: 

1796 self._input_topology_file = MISSING_TOPOLOGY 

1797 return self._input_topology_file 

1798 # If no input is passed then we check the inputs file 

1799 # Set the file 

1800 self._input_topology_file = File(input_topology_filepath) 

1801 # If the file already exists then we are done 

1802 if self._input_topology_file.exists: 

1803 return self._input_topology_file 

1804 # Try to download it 

1805 # If we do not have the required parameters to download it then we surrender here 

1806 if not self.remote: 

1807 raise InputError(f'Missing input topology file "{self._input_topology_file.filename}"') 

1808 # Otherwise, try to download it using the files endpoint 

1809 # Note that this is not usually required 

1810 if self._input_topology_file.filename == STANDARD_TOPOLOGY_FILENAME: 

1811 self.remote.download_standard_topology(self._input_topology_file) 

1812 else: 

1813 self.remote.download_file(self._input_topology_file) 

1814 # In case the topology is a '.top' file we consider it is a Gromacs topology 

1815 # It may come with additional itp files we must download as well 

1816 if self._input_topology_file.format == 'top': 

1817 # Find available .itp files and download each of them 

1818 itp_filenames = [filename for filename in self.remote.available_files if filename[-4:] == '.itp'] 

1819 for itp_filename in itp_filenames: 

1820 itp_filepath = self.pathify(itp_filename) 

1821 itp_file = File(itp_filepath) 

1822 self.remote.download_file(itp_file) 

1823 return self._input_topology_file 

1824 input_topology_file = property(get_input_topology_file, None, None, "Input topology file (read only)") 

1825 

1826 def get_input_structure_file (self) -> File: 

1827 """ Get the input structure filename. """ 

1828 # When calling this function make sure all MDs have the file or try to download it 

1829 return self.reference_md._input_structure_file 

1830 input_structure_file = property(get_input_structure_file, None, None, "Input structure filename for each MD (read only)") 

1831 

1832 def get_input_trajectory_files (self) -> list[File]: 

1833 """ Get the input trajectory filename(s) from the inputs. 

1834 If file(s) are not found try to download it. """ 

1835 return self.reference_md._input_trajectory_files 

1836 input_trajectory_files = property(get_input_trajectory_files, None, None, "Input trajectory filenames for each MD (read only)") 

1837 

1838 def get_populations_file (self) -> Optional[File]: 

1839 """ Get the MSM equilibrium populations file. """ 

1840 if not self.get_file(self._populations_file): 

1841 return None 

1842 return self._populations_file 

1843 populations_file = property(get_populations_file, None, None, "MSM equilibrium populations file (read only)") 

1844 

1845 def get_transitions_file (self) -> Optional[File]: 

1846 """ Get the MSM transition probabilities file. """ 

1847 if not self.get_file(self._transitions_file): 

1848 return None 

1849 return self._transitions_file 

1850 transitions_file = property(get_transitions_file, None, None, "MSM transition probabilities file (read only)") 

1851 

1852 def get_aiida_data_file (self) -> Optional[File]: 

1853 """ Get the AiiDA data file.""" 

1854 if not self._aiida_data_file: return None 

1855 if not self.get_file(self._aiida_data_file): return None 

1856 return self._aiida_data_file 

1857 aiida_data_file = property(get_aiida_data_file, None, None, "AiiDA data file (read only)") 

1858 

1859 # --------------------------------- 

1860 

1861 def get_file (self, target_file : File) -> bool: 

1862 """ Check if a file exists. 

1863 If not, try to download it from the database. 

1864 If the file is not found in the database it is fine, we do not even warn the user. 

1865 Note that nowadays this function is used to get populations and transitions files, which are not common. 

1866 """ 

1867 return self.reference_md.get_file(target_file) 

1868 

1869 # Input file values ----------------------------------------- 

1870 

1871 # First of all set input themselves 

1872 

1873 # Get inputs 

1874 def get_inputs (self) -> dict: 

1875 # If inputs are already loaded then return them 

1876 if self._inputs: 

1877 return self._inputs 

1878 # When loading the inuts file, replace some values automatically 

1879 replaces = [ 

1880 ( '$DIR', self.directory_name ) 

1881 ] 

1882 # Otherwise, load inputs from the inputs file 

1883 inputs_data = None 

1884 if self.inputs_file.format == 'json': 

1885 inputs_data = load_json(self.inputs_file.path, replaces) 

1886 elif self.inputs_file.format == 'yaml': 

1887 inputs_data = load_yaml(self.inputs_file.path, replaces) 

1888 else: 

1889 raise InputError('Input file format is not supported. Please use json or yaml files.') 

1890 if not inputs_data: 

1891 raise InputError('Input file is empty') 

1892 self._inputs = inputs_data 

1893 # Legacy fixes 

1894 old_pdb_ids = self._inputs.get('pdbIds', None) 

1895 if old_pdb_ids: 

1896 self._inputs['pdb_ids'] = old_pdb_ids 

1897 # Finally return the updated inputs 

1898 return self._inputs 

1899 inputs = property(get_inputs, None, None, "Inputs from the inputs file (read only)") 

1900 

1901 def update_inputs (self, nested_key : str, new_value): 

1902 """ Permanently update the inputs file. 

1903 This may be done when command line inputs do not match file inputs. """ 

1904 # If the input already matches then do nothing 

1905 current_value = read_ndict(self.inputs, nested_key, MISSING_INPUT_EXCEPTION) 

1906 if current_value == new_value: return 

1907 # Set the new value 

1908 write_ndict(self.inputs, nested_key, new_value) 

1909 # If there is no inputs file then do not try to save anything 

1910 if not self.is_inputs_file_available(): return 

1911 print(f'* Field "{nested_key}" in the inputs file will be permanently modified') 

1912 # Write the new inputs to disk 

1913 if self.inputs_file.format == 'json': 

1914 save_json(self.inputs, self.inputs_file.path) 

1915 elif self.inputs_file.format == 'yaml': 

1916 # Note that comments in the original YAML file will be not kept 

1917 save_yaml(self.inputs, self.inputs_file.path) 

1918 else: 

1919 raise InputError('Input file format is not supported. Please use json or yaml files.') 

1920 

1921 # Then set getters for every value in the inputs file 

1922 

1923 def get_input (self, name: str): 

1924 """ Get a specific 'input' value. """ 

1925 value = self.inputs.get(name, MISSING_INPUT_EXCEPTION) 

1926 # If we had a value then return it 

1927 if value != MISSING_INPUT_EXCEPTION: 

1928 return value 

1929 # If the field is not specified in the inputs file then set a defualt value 

1930 default_value = DEFAULT_INPUT_VALUES.get(name, None) 

1931 # Warn the user about this 

1932 warn(f'Missing input "{name}" -> Using default value: {default_value}') 

1933 return default_value 

1934 

1935 # Set a function to get a specific 'input' value by its key/name 

1936 # Note that we return the getter function but we do not call it just yet 

1937 def input_getter (name : str): 

1938 def getter (self): 

1939 return self.get_input(name) 

1940 return getter 

1941 

1942 # Assign the getters 

1943 input_interactions = property(input_getter('interactions'), None, None, "Interactions to be analyzed (read only)") 

1944 input_protein_references = property(input_getter('forced_references'), None, None, "Uniprot IDs to be used first when aligning protein sequences (read only)") 

1945 input_pdb_ids = property(input_getter('pdb_ids'), None, None, "Protein Data Bank IDs used for the setup of the system (read only)") 

1946 input_type = property(input_getter('type'), None, None, "Set if its a trajectory or an ensemble (read only)") 

1947 input_mds = property(input_getter('mds'), None, None, "Input MDs configuration (read only)") 

1948 input_ligands = property(input_getter('ligands'), None, None, "Input ligand references (read only)") 

1949 input_force_fields = property(input_getter('ff'), None, None, "Input force fields (read only)") 

1950 input_collections = property(input_getter('collections'), None, None, "Input collections (read only)") 

1951 input_chain_names = property(input_getter('chainnames'), None, None, "Input chain names (read only)") 

1952 input_framestep = property(input_getter('framestep'), None, None, "Input framestep (read only)") 

1953 input_name = property(input_getter('name'), None, None, "Input name (read only)") 

1954 input_description = property(input_getter('description'), None, None, "Input description (read only)") 

1955 input_authors = property(input_getter('authors'), None, None, "Input authors (read only)") 

1956 input_groups = property(input_getter('groups'), None, None, "Input groups (read only)") 

1957 input_contact = property(input_getter('contact'), None, None, "Input contact (read only)") 

1958 input_program = property(input_getter('program'), None, None, "Input program (read only)") 

1959 input_version = property(input_getter('version'), None, None, "Input version (read only)") 

1960 input_method = property(input_getter('method'), None, None, "Input method (read only)") 

1961 input_license = property(input_getter('license'), None, None, "Input license (read only)") 

1962 input_linkcense = property(input_getter('linkcense'), None, None, "Input license link (read only)") 

1963 input_citation = property(input_getter('citation'), None, None, "Input citation (read only)") 

1964 input_thanks = property(input_getter('thanks'), None, None, "Input acknowledgements (read only)") 

1965 input_links = property(input_getter('links'), None, None, "Input links (read only)") 

1966 input_timestep = property(input_getter('timestep'), None, None, "Input timestep (read only)") 

1967 input_temperature = property(input_getter('temp'), None, None, "Input temperature (read only)") 

1968 input_ensemble = property(input_getter('ensemble'), None, None, "Input ensemble (read only)") 

1969 input_water = property(input_getter('wat'), None, None, "Input water force field (read only)") 

1970 input_boxtype = property(input_getter('boxtype'), None, None, "Input boxtype (read only)") 

1971 input_pbc_selection = property(input_getter('pbc_selection'), None, None, "Input Periodic Boundary Conditions (PBC) selection (read only)") 

1972 input_cg_selection = property(input_getter('cg_selection'), None, None, "Input Coarse Grained (CG) selection (read only)") 

1973 input_customs = property(input_getter('customs'), None, None, "Input custom representations (read only)") 

1974 input_orientation = property(input_getter('orientation'), None, None, "Input orientation (read only)") 

1975 input_multimeric = property(input_getter('multimeric'), None, None, "Input multimeric labels (read only)") 

1976 # Additional topic-specific inputs 

1977 input_cv19_unit = property(input_getter('cv19_unit'), None, None, "Input Covid-19 Unit (read only)") 

1978 input_cv19_startconf = property(input_getter('cv19_startconf'), None, None, "Input Covid-19 starting conformation (read only)") 

1979 input_cv19_abs = property(input_getter('cv19_abs'), None, None, "Input Covid-19 antibodies (read only)") 

1980 input_cv19_nanobs = property(input_getter('cv19_nanobs'), None, None, "Input Covid-19 nanobodies (read only)") 

1981 

1982 # PBC selection may come from the console or from the inputs file 

1983 # Console has priority over the inputs file 

1984 def get_input_pbc_selection (self) -> Optional[str]: 

1985 # If we have an internal value then return it 

1986 if self._input_pbc_selection: 

1987 return self._input_pbc_selection 

1988 # As an exception, we avoid asking for the inputs file if it is not available 

1989 # This input is required for some early processing steps where we do not need the inputs file for anything else 

1990 if not self.is_inputs_file_available(): 

1991 return None 

1992 # Otherwise, find it in the inputs 

1993 # Get the input value, whose key must exist 

1994 self._input_pbc_selection = self.get_input('pbc_selection') 

1995 return self._input_pbc_selection 

1996 input_pbc_selection = property(get_input_pbc_selection, None, None, "Selection of atoms which are still in periodic boundary conditions (read only)") 

1997 

1998 # CG selection may come from the console or from the inputs file 

1999 # Console has priority over the inputs file 

2000 def get_input_cg_selection (self) -> Optional[str]: 

2001 # If we have an internal value then return it 

2002 if self._input_cg_selection: 

2003 return self._input_cg_selection 

2004 # As an exception, we avoid asking for the inputs file if it is not available 

2005 # This input is required for some early processing steps where we do not need the inputs file for anything else 

2006 if not self.is_inputs_file_available(): 

2007 return None 

2008 # Otherwise, find it in the inputs 

2009 # Get the input value, whose key must exist 

2010 self._input_cg_selection = self.get_input('cg_selection') 

2011 return self._input_cg_selection 

2012 input_cg_selection = property(get_input_cg_selection, None, None, "Selection of atoms which are not acutal atoms but Coarse Grained beads (read only)") 

2013 

2014 # Set additional values infered from input values 

2015 

2016 def check_is_time_dependent (self) -> bool: 

2017 """ Set if MDs are time dependent. """ 

2018 if self.input_type == 'trajectory': 

2019 return True 

2020 elif self.input_type == 'ensemble': 

2021 return False 

2022 raise InputError(f'Not supported input "type" value: {self.input_type}. It must be "trajectory" or "ensemble"') 

2023 is_time_dependent = property(check_is_time_dependent, None, None, "Check if trajectory frames are time dependent (read only)") 

2024 

2025 # Processed files ---------------------------------------------------- 

2026 

2027 def inherit_topology_filename (self) -> Optional[str]: 

2028 """ Set the expected output topology filename given the input topology filename. 

2029 Note that topology formats are conserved. """ 

2030 if self.input_topology_file == MISSING_TOPOLOGY: 

2031 return None 

2032 filename = self.input_topology_file.filename 

2033 if not filename: 

2034 return None 

2035 if filename == RAW_CHARGES_FILENAME: 

2036 return filename 

2037 standard_format = self.input_topology_file.format 

2038 return 'topology.' + standard_format 

2039 

2040 def get_topology_filepath (self) -> str: 

2041 """ Get the processed topology file path. """ 

2042 # If we have a stored value then return it 

2043 if self._topology_filepath: 

2044 return self._topology_filepath 

2045 # Otherwise we must find it 

2046 inherited_filename = self.inherit_topology_filename() 

2047 self._topology_filepath = self.pathify(inherited_filename) if inherited_filename else None 

2048 return self._topology_filepath 

2049 topology_filepath = property(get_topology_filepath, None, None, "Topology file path (read only)") 

2050 

2051 def get_topology_file (self) -> str: 

2052 """ Get the processed topology file. """ 

2053 # If we have a stored value then return it 

2054 # This means we already found or generated this file 

2055 if self._topology_file != None: 

2056 return self._topology_file 

2057 # If the file already exists then we are done 

2058 self._topology_file = File(self.topology_filepath) if self.topology_filepath != None else MISSING_TOPOLOGY 

2059 # If the faith flag was passed then simply make sure the input file makes sense 

2060 if self.faith: 

2061 if self.input_topology_file != self._topology_file: 

2062 raise InputError('Input topology file is not equal to output topology file but the "faith" flag was used.\n' 

2063 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

2064 if not self.input_topology_file.exists: 

2065 raise InputError('Input topology file does not exist but the "faith" flag was used.\n' 

2066 ' Please refrain from using the faith argument (-f) if you ignore its effect.') 

2067 return self._topology_file 

2068 # Run the processing logic 

2069 self.reference_md.input_files_processing(self.reference_md) 

2070 # Now that the file is sure to exist we return it 

2071 return self._topology_file 

2072 topology_file = property(get_topology_file, None, None, "Topology file (read only)") 

2073 

2074 def get_structure_file (self) -> str: 

2075 """ Get the processed structure from the reference MD. """ 

2076 return self.reference_md.structure_file 

2077 structure_file = property(get_structure_file, None, None, "Structure filename from the reference MD (read only)") 

2078 

2079 def get_trajectory_file (self) -> str: 

2080 """ Get the processed trajectory from the reference MD. """ 

2081 return self.reference_md.trajectory_file 

2082 trajectory_file = property(get_trajectory_file, None, None, "Trajectory filename from the reference MD (read only)") 

2083 

2084 # --------------------------------------------------------------------------------- 

2085 # Others values which may be found/calculated and files to be generated on demand 

2086 # --------------------------------------------------------------------------------- 

2087 

2088 def get_structure (self) -> 'Structure': 

2089 """ Get the parsed structure from the reference MD. """ 

2090 return self.reference_md.structure 

2091 structure = property(get_structure, None, None, "Parsed structure from the reference MD (read only)") 

2092 

2093 def get_pbc_residues (self) -> list[int]: 

2094 """ Get the indices of residues in periodic boundary conditions. """ 

2095 return self.reference_md.pbc_residues 

2096 pbc_residues = property(get_pbc_residues, None, None, "Indices of residues in periodic boundary conditions (read only)") 

2097 

2098 def get_cg_residues (self) -> list[int]: 

2099 """ Get the indices of residues in coarse grain. """ 

2100 return self.reference_md.cg_residues 

2101 cg_residues = property(get_cg_residues, None, None, "Indices of residues in coarse grain (read only)") 

2102 

2103 def get_snapshots (self) -> int: 

2104 """ Get the reference MD snapshots. """ 

2105 return self.reference_md.snapshots 

2106 snapshots = property(get_snapshots, None, None, "Reference MD snapshots (read only)") 

2107 

2108 def get_universe (self) -> int: 

2109 """ Get the MDAnalysis Universe from the reference MD. """ 

2110 return self.reference_md.universe 

2111 universe = property(get_universe, None, None, "MDAnalysis Universe object (read only)") 

2112 

2113 def get_processed_interactions (self) -> dict: 

2114 """ Get the processed interactions from the reference replica, which are the same for all replicas. """ 

2115 return self.reference_md.interactions 

2116 interactions = property(get_processed_interactions, None, None, "Processed interactions (read only)") 

2117 

2118 def get_check_stable_bonds (self) -> bool: 

2119 """ Check if we must check stable bonds. """ 

2120 # Set if stable bonds have to be checked 

2121 must_check = STABLE_BONDS_FLAG not in self.trust 

2122 # If this analysis has been already passed then we can trust structure bonds 

2123 if self.register.tests.get(STABLE_BONDS_FLAG, None) == True: 

2124 must_check = False 

2125 return must_check 

2126 must_check_stable_bonds = property(get_check_stable_bonds, None, None, "Check if we must check stable bonds (read only)") 

2127 

2128 # Reference bonds 

2129 get_reference_bonds = Task('refbonds', 'Reference bonds', find_safe_bonds) 

2130 reference_bonds = property(get_reference_bonds, None, None, "Atom bonds to be trusted (read only)") 

2131 

2132 # Atom charges 

2133 get_charges = Task('charges', 'Getting atom charges', get_charges) 

2134 charges = property(get_charges, None, None, "Atom charges (read only)") 

2135 

2136 # InChI keys 

2137 get_inchi_keys = Task('inchikeys', 'Getting InChI keys', get_inchikeys) 

2138 inchikeys = property(get_inchi_keys, None, None, "InChI keys (read only)") 

2139 

2140 def get_topology_reader (self) -> 'Topology': 

2141 """ Get the topology data reader. """ 

2142 # If we already have a stored value then return it 

2143 if self._topology_reader: return self._topology_reader 

2144 # Instantiate the topology reader 

2145 self._topology_reader = Topology(self.topology_file) 

2146 return self._topology_reader 

2147 topology_reader = property(get_topology_reader, None, None, "Topology reader (read only)") 

2148 

2149 def get_dihedrals (self) -> list[dict]: 

2150 """ Get the topology dihedrals. """ 

2151 # If we already have a stored value then return it 

2152 if self._dihedrals: return self._dihedrals 

2153 # Calculate the dihedrals otherwise 

2154 self._dihedrals = self.topology_reader.get_dihedrals_data() 

2155 return self._dihedrals 

2156 dihedrals = property(get_dihedrals, None, None, "Topology dihedrals (read only)") 

2157 

2158 def get_populations (self) -> Optional[list[float]]: 

2159 """ Get the equilibrium populations from a MSM. """ 

2160 # If we already have a stored value then return it 

2161 if self._populations: 

2162 return self._populations 

2163 # Otherwise we must find the value 

2164 if not self.populations_file: 

2165 return None 

2166 self._populations = read_file(self.populations_file) 

2167 return self._populations 

2168 populations = property(get_populations, None, None, "Equilibrium populations from a MSM (read only)") 

2169 

2170 def get_transitions (self) -> Optional[list[list[float]]]: 

2171 """ Get the transition probabilities from a MSM. """ 

2172 # If we already have a stored value then return it 

2173 if self._transitions: 

2174 return self._transitions 

2175 # Otherwise we must find the value 

2176 if not self.transitions_file: 

2177 return None 

2178 self._transitions = read_file(self.transitions_file) 

2179 return self._transitions 

2180 transitions = property(get_transitions, None, None, "Transition probabilities from a MSM (read only)") 

2181 

2182 def get_pdb_ids (self) -> list[str]: 

2183 """ Get the tested and standardized PDB ids. """ 

2184 # If we already have a stored value then return it 

2185 if self._pdb_ids != None: 

2186 return self._pdb_ids 

2187 # Otherwise test and standarize input PDB ids 

2188 self._pdb_ids = [] 

2189 # If there is no input pdb ids (may be None) then stop here 

2190 if not self.input_pdb_ids: 

2191 return [] 

2192 # If input PDB ids is a string instead of a list then fix it 

2193 input_pdb_ids = self.input_pdb_ids 

2194 if type(input_pdb_ids) == str: 

2195 input_pdb_ids = [ input_pdb_ids ] 

2196 # Iterate input PDB ids 

2197 for input_pdb_id in input_pdb_ids: 

2198 # First make sure this is a PDB id 

2199 if not re.match(PDB_ID_FORMAT, input_pdb_id): 

2200 raise InputError(f'Input PDB id "{input_pdb_id}" does not look like a PDB id') 

2201 # Make letters upper 

2202 pdb_id = input_pdb_id.upper() 

2203 self._pdb_ids.append(pdb_id) 

2204 return self._pdb_ids 

2205 pdb_ids = property(get_pdb_ids, None, None, "Tested and standarized PDB ids (read only)") 

2206 

2207 # Prepare the PDB references file to be uploaded to the database 

2208 get_pdb_references = Task('pdbs', 'Prepare PDB references', 

2209 prepare_pdb_references, output_filename = PDB_REFERENCES_FILENAME) 

2210 pdb_references_file = get_pdb_references.get_output_file 

2211 

2212 # Map the structure aminoacids sequences against the Uniprot reference sequences 

2213 get_protein_map = Task('protmap', 'Protein residues mapping', 

2214 generate_protein_mapping, output_filename=PROTEIN_REFERENCES_FILENAME) 

2215 protein_map = property(get_protein_map, None, None, "Protein residues mapping (read only)") 

2216 

2217 # Define the output file of the protein mapping including protein references 

2218 get_protein_references_file = get_protein_map.get_output_file 

2219 protein_references_file = property(get_protein_references_file, None, None, "File including protein refereces data mined from UniProt (read only)") 

2220 

2221 # Get chain references 

2222 get_chain_references = Task('chains', 'Chain references', 

2223 prepare_chain_references, output_filename = OUTPUT_CHAINS_FILENAME) 

2224 

2225 # Get the ligand residues mapping 

2226 get_ligand_map = Task('ligmap', 'Ligand residues mapping', 

2227 generate_ligand_mapping, output_filename = LIGAND_REFERENCES_FILENAME) 

2228 ligand_map = property(get_ligand_map, None, None, "Ligand references (read only)") 

2229 

2230 # Define the output file of the ligand mapping including ligand references 

2231 get_ligand_references_file = get_ligand_map.get_output_file 

2232 ligand_references_file = property(get_ligand_references_file, None, None, "File including ligand refereces data mined from PubChem (read only)") 

2233 

2234 # Get the lipid references 

2235 get_lipid_map = Task('lipmap', 'Lipid mapping', 

2236 generate_lipid_references, output_filename = INCHIKEY_REFERENCES_FILENAME) 

2237 lipid_map = property(get_lipid_map, None, None, "Lipid mapping (read only)") 

2238 

2239 # Define the output file of the lipid mapping including lipid references 

2240 get_lipid_references_file = get_lipid_map.get_output_file 

2241 lipid_references_file = property(get_lipid_references_file, None, None, "File including lipid references data mined from PubChem (read only)") 

2242 

2243 # Get mapping of residues in the membrane 

2244 get_membrane_map = Task('memmap', 'Membrane mapping', 

2245 generate_membrane_mapping, output_filename = MEMBRANE_MAPPING_FILENAME) 

2246 membrane_map = property(get_membrane_map, None, None, "Membrane mapping (read only)") 

2247 

2248 # Build the residue map from both proteins and ligands maps 

2249 # This is formatted as both the standard topology and metadata producers expect them 

2250 get_residue_map = Task('resmap', 'Residue mapping', generate_residue_mapping) 

2251 residue_map = property(get_residue_map, None, None, "Residue map (read only)") 

2252 

2253 # Prepare the project metadata file to be upload to the database 

2254 prepare_metadata = Task('pmeta', 'Prepare project metadata', 

2255 prepare_project_metadata, output_filename=OUTPUT_METADATA_FILENAME) 

2256 

2257 # Prepare the standard topology file to be uploaded to the database 

2258 prepare_standard_topology = Task('stopology', 'Standard topology file', 

2259 generate_topology, output_filename = STANDARD_TOPOLOGY_FILENAME) 

2260 get_standard_topology_file = prepare_standard_topology.get_output_file 

2261 standard_topology_file = property(get_standard_topology_file, None, None, "Standard topology filename (read only)") 

2262 

2263 # Get a screenshot of the system 

2264 get_screenshot_filename = Task('screenshot', 'Screenshot file', 

2265 get_screenshot, output_filename = OUTPUT_SCREENSHOT_FILENAME) 

2266 screenshot_filename = property(get_screenshot_filename, None, None, "Screenshot filename (read only)") 

2267 

2268 # Provenance data 

2269 produce_provenance = Task('aiidata', 'Produce provenance', produce_provenance) 

2270 

2271# AUXILIAR FUNCTIONS --------------------------------------------------------------------------- 

2272 

2273# DANI: En cuanto se concrete el formato de los markov esta función no hará falta 

2274def read_file (target_file : File) -> dict: 

2275 """ Set a function to read a file which may be in different formats. """ 

2276 # Get the file format 

2277 file_format = target_file.filename.split('.')[-1] 

2278 # Read numpy files 

2279 if file_format == 'npy': 

2280 return numpy.load(target_file.path) 

2281 # Read JSON files 

2282 if file_format == 'json': 

2283 return load_json(target_file.path) 

2284 

2285def name_2_directory (name : str) -> str: 

2286 """ Set a function to convert an MD name into an equivalent MD directory. """ 

2287 # Replace white spaces with underscores 

2288 directory = name.replace(' ', '_') 

2289 # Remove problematic characters 

2290 for character in FORBIDDEN_DIRECTORY_CHARACTERS: 

2291 directory = directory.replace(character, '') 

2292 return directory 

2293 

2294def check_directory (directory : str) -> str: 

2295 """ Check for problematic characters in a directory path. """ 

2296 # Remove problematic characters 

2297 directory_characters = set(directory) 

2298 for character in FORBIDDEN_DIRECTORY_CHARACTERS: 

2299 if character in directory_characters: 

2300 raise InputError(f'Directory path "{directory}" includes the forbidden character "{character}"') 

2301 

2302def directory_2_name (directory : str) -> str: 

2303 """ Convert an MD directory into an equivalent MD name. """ 

2304 # Remove a possible starting './' 

2305 # Replace white spaces with underscores 

2306 name = directory.split('/')[-1].replace('_', ' ') 

2307 return name 

2308 

2309# Project input files 

2310project_input_files = { 

2311 'itopology': Project.get_input_topology_file, 

2312 'inputs': Project.get_inputs_file, 

2313 'populations': Project.get_populations_file, 

2314 'transitions': Project.get_transitions_file 

2315} 

2316# MD input files 

2317md_input_files = { 

2318 'istructure': MD.get_input_structure_file, 

2319 'itrajectory': MD.get_input_trajectory_files 

2320} 

2321# Both project and MD input files 

2322input_files = { **project_input_files, **md_input_files } 

2323 

2324# Project processed files 

2325project_processed_files = { 

2326 'topology': Project.get_topology_file 

2327} 

2328# MD processed files 

2329md_processed_files = { 

2330 'structure': MD.get_structure_file, 

2331 'trajectory': MD.get_trajectory_file 

2332} 

2333# Both project and MD processed files 

2334processed_files = { **project_processed_files, **md_processed_files } 

2335 

2336# Project requestable tasks 

2337project_requestables = { 

2338 **project_input_files, 

2339 **project_processed_files, 

2340} 

2341# Add available tasks to project requestables 

2342for callable in vars(Project).values(): 

2343 if isinstance(callable, Task): project_requestables[callable.flag] = callable 

2344# MD requestable tasks 

2345md_requestables = { 

2346 **md_input_files, 

2347 **md_processed_files, 

2348} 

2349# Add available tasks to project requestables 

2350for callable in vars(MD).values(): 

2351 if isinstance(callable, Task): md_requestables[callable.flag] = callable 

2352# Requestables for the console 

2353# Note that this constant is global 

2354requestables.update({ **project_requestables, **md_requestables }) 

2355# Inverted requestables for every function to know which is its 'label' 

2356inverted_requestables.update({ v: k for k, v in requestables.items() }) 

2357 

2358# Set groups of dependencies to be requested together using only one flag 

2359DEPENDENCY_FLAGS = { 

2360 'download': list(input_files.keys()), 

2361 'setup': list(processed_files.keys()), 

2362 'meta': ['pmeta', 'mdmeta'], 

2363 'network': [ 'resmap', 'ligands', 'chains', 'pdbs', 'memmap' ], 

2364 'minimal': [ 'pmeta', 'mdmeta', 'stopology' ], 

2365 'interdeps': [ 'interactions', 'pairwise', 'hbonds', 'energies', 'perres', 'clusters', 'dist' ], 

2366 'membs': ['memmap', 'density', 'thickness', 'apl', 'lorder', 'linter', 'channels'] 

2367} 

2368 

2369# Set the default analyses to be run when no task is specified 

2370DEFAULT_ANALYSES = ['clusters', 'dist', 'energies', 'hbonds', 'pca', 'pockets', 

2371 'rgyr', 'rmsds', 'perres', 'pairwise', 'rmsf', 'sas', 'tmscore', 'density', 

2372 'thickness', 'apl', 'lorder', 'linter'] 

2373 

2374# The actual main function 

2375def workflow ( 

2376 # Project parameters 

2377 project_parameters : dict = {}, 

2378 # The actual workflow parameters 

2379 # The working directory 

2380 working_directory : str = '.', 

2381 # Download only 

2382 download : bool = False, 

2383 # Download and correct only 

2384 setup : bool = False, 

2385 # Run only specific analyses/processes 

2386 include : Optional[list[str]] = None, 

2387 # Run everything but specific analyses/processes 

2388 exclude : Optional[list[str]] = None, 

2389 # Overwrite already existing output files 

2390 overwrite : Optional[ list[str] | bool ] = None, 

2391): 

2392 

2393 # Check there are not input errors 

2394 

2395 # Include and exclude are not compatible 

2396 # This is to protect the user to do something which makes not sense 

2397 if include and exclude: 

2398 raise InputError('Include (-i) and exclude (-e) are not compatible. Use one of these options.') 

2399 

2400 # Make sure the working directory exists 

2401 if not exists(working_directory): 

2402 raise InputError(f'Working directory "{working_directory}" does not exist') 

2403 

2404 # Make sure the working directory is actually a directory 

2405 if not isdir(working_directory): 

2406 raise InputError(f'Working directory "{working_directory}" is actually not a directory') 

2407 

2408 # Move the current directory to the working directory 

2409 chdir(working_directory) 

2410 current_directory_name = getcwd().split('/')[-1] 

2411 git_version = get_git_version() 

2412 print(f'\n{CYAN_HEADER}Running MDDB workflow ({git_version}) for project at {current_directory_name}{COLOR_END}') 

2413 

2414 # Initiate the project project 

2415 project = Project(**project_parameters) 

2416 print(f' {len(project.mds)} MDs are to be run') 

2417 

2418 # Set the tasks to be run 

2419 tasks = None 

2420 # If the download argument is passed then just make sure input files are available 

2421 if download: 

2422 warn('The "-d" or "--download" argument is deprecated. Please use "-i download" instead.') 

2423 tasks = list(input_files.keys()) 

2424 # If the setup argument is passed then just process input files 

2425 elif setup: 

2426 warn('The "-s" or "--setup" argument is deprecated. Please use "-i setup" instead.') 

2427 tasks = list(processed_files.keys()) 

2428 # If the include argument then add only the specified tasks to the list 

2429 elif include and len(include) > 0: 

2430 tasks = [ *include ] 

2431 # Search for special flags among included 

2432 for flag, dependencies in DEPENDENCY_FLAGS.items(): 

2433 if flag not in tasks: continue 

2434 # If the flag is found then remove it and write the corresponding dependencie instead 

2435 # Make sure not to duplicate a dependency if it was already included 

2436 tasks.remove(flag) 

2437 for dep in dependencies: 

2438 if dep in tasks: continue 

2439 tasks.append(dep) 

2440 # Set the default tasks otherwise 

2441 else: 

2442 tasks = [ 

2443 # Project tasks 

2444 'stopology', 

2445 'screenshot', 

2446 'pmeta', 

2447 'pdbs', 

2448 'chains', 

2449 # MD tasks 

2450 'mdmeta', 

2451 'inter', 

2452 *DEFAULT_ANALYSES, 

2453 ] 

2454 # If the exclude parameter was passed then remove excluded tasks from the default tasks 

2455 if exclude and len(exclude) > 0: 

2456 excluded_dependencies = [ *exclude ] 

2457 # Search for special flags among excluded 

2458 for flag, dependencies in DEPENDENCY_FLAGS.items(): 

2459 if flag not in exclude: continue 

2460 # If the flag is found then exclude the dependencies instead 

2461 # Make sure not to duplicate a dependency if it was already included 

2462 excluded_dependencies.remove(flag) 

2463 for dep in dependencies: 

2464 if dep in exclude: continue 

2465 excluded_dependencies.append(dep) 

2466 tasks = [ name for name in tasks if name not in excluded_dependencies ] 

2467 

2468 # If the user requested to overwrite something, make sure it is in the tasks list 

2469 

2470 # Update the overwritable variable with the requested overwrites 

2471 overwritables = set() 

2472 if overwrite: 

2473 # If the overwrite argument is simply true then add all requestables to the overwritable 

2474 if type(overwrite) == bool: 

2475 for task in tasks: 

2476 overwritables.add(task) 

2477 # If the overwrite argument is a list of tasks then iterate them 

2478 elif type(overwrite) == list: 

2479 for task in overwrite: 

2480 # Make sure the task to be overwriten is among the tasks to be run 

2481 if task not in tasks: 

2482 raise InputError(f'Task "{task}" is to be overwriten but it is not in the tasks list. Either include it or do not exclude it') 

2483 # Add it to the global variable 

2484 overwritables.add(task) 

2485 else: raise ValueError('Not supported overwrite type') 

2486 

2487 # Get project tasks 

2488 project_tasks = [ task for task in tasks if task in project_requestables ] 

2489 # Get the MD tasks 

2490 md_tasks = [ task for task in tasks if task in md_requestables ] 

2491 

2492 # Set project overwritables 

2493 project.overwritables = set([ task for task in project_tasks if task in overwritables ]) 

2494 # Set MD overwrittables 

2495 # Note that this must be done before running project tasks 

2496 # Some project tasks rely in MD tasks 

2497 for md in project.mds: 

2498 md.overwritables = set([ task for task in md_tasks if task in overwritables ]) 

2499 

2500 # Run the project tasks now 

2501 for task in project_tasks: 

2502 # Get the function to be called and call it 

2503 getter = requestables[task] 

2504 getter(project) 

2505 

2506 # If there are no MD tasks then we are done already 

2507 if len(md_tasks) == 0: 

2508 print("Finished!") 

2509 return 

2510 

2511 # Now iterate over the different MDs 

2512 for md in project.mds: 

2513 print(f'\n{CYAN_HEADER} Processing MD at {md.directory}{COLOR_END}') 

2514 # Run the MD tasks 

2515 for task in md_tasks: 

2516 # Get the function to be called and call it 

2517 getter = requestables[task] 

2518 getter(md) 

2519 

2520 # Remove gromacs backups and other trash files from this MD 

2521 remove_trash(md.directory) 

2522 

2523 # Remove gromacs backups and other trash files from the project 

2524 remove_trash(project.directory) 

2525 

2526 print("Done!")