Coverage for model_workflow/analyses/rmsd_check.py: 17%

328 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import mdtraj as mdt 

2import pytraj as pt 

3import numpy as np 

4import time 

5import math 

6import sys 

7 

8# Visual output tools 

9from tqdm import tqdm 

10import plotext as plt 

11 

12from model_workflow.utils.auxiliar import delete_previous_log, reprint, TestFailure, warn 

13from model_workflow.utils.constants import TRAJECTORY_INTEGRITY_FLAG 

14from model_workflow.utils.pyt_spells import get_pytraj_trajectory 

15from model_workflow.utils.type_hints import * 

16 

17# Check if the output is going to a terminal or not 

18is_terminal = sys.stdout.isatty() 

19 

20# LORE 

21# This test was originaly intended to use a RMSD jump cutoff based on number of atoms and timestep 

22# However, after a deep study, it was observed that simulations with similar features may show very different RMSD jumps 

23# For this reason now we comptue RMSD jumps along the whole trajectory and check that the biggest jump is not an outlier 

24# The outlier is defined according to how many times the standard deviation far from the mean is a value 

25 

26# Look for sudden raises of RMSd values from one frame to another 

27# To do so, we check the RMSD of every frame using its previous frame as reference 

28def check_trajectory_integrity ( 

29 input_structure_filename : str, 

30 input_trajectory_filename : str, 

31 structure : 'Structure', 

32 pbc_selection : 'Selection', 

33 mercy : List[str], 

34 trust: List[str], 

35 register : 'Register', 

36 #time_length : float, 

37 check_selection : str, 

38 # DANI: He visto saltos 'correctos' pasar de 6 

39 # DANI: He visto saltos 'incorrectos' no bajar de 10 

40 standard_deviations_cutoff : float, 

41 snapshots : int) -> bool: 

42 

43 # Skip the test if we trust 

44 if TRAJECTORY_INTEGRITY_FLAG in trust: 

45 return True 

46 

47 # Skip the test if it is already passed according to the register 

48 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None): 

49 return True 

50 

51 # Remove old warnings 

52 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG) 

53 

54 # Parse the selection in VMD selection syntax 

55 parsed_selection = structure.select(check_selection, syntax='vmd') 

56 

57 # If there is nothing to check then warn the user and stop here 

58 if not parsed_selection: 

59 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis') 

60 

61 # Discard PBC residues from the selection to be checked 

62 parsed_selection -= pbc_selection 

63 

64 # If there is nothing to check then warn the user and stop here 

65 if not parsed_selection: 

66 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction') 

67 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na') 

68 return True 

69 

70 print('Checking trajectory integrity') 

71 

72 # Load the trajectory frame by frame 

73 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1) 

74 

75 # Save the previous frame any time 

76 previous_frame = next(trajectory) 

77 

78 # Save all RMSD jumps 

79 rmsd_jumps = [] 

80 

81 # Initialize progress bar first 

82 pbar = tqdm(enumerate(trajectory, 1), total=snapshots, desc=' Frame', unit='frame', initial=1) 

83 

84 for f, frame in pbar: 

85 # Calculate RMSD value between previous and current frame 

86 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=parsed_selection.atom_indices)[0] 

87 rmsd_jumps.append(rmsd_value) 

88 

89 # Update the previous frame as the current one 

90 previous_frame = frame 

91 time.sleep(0.1) # Needed for progress bar to be print at the correct place 

92 

93 # If the trajectory has only 1 or 2 frames then there is no test to do 

94 if len(rmsd_jumps) <= 1: 

95 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

96 return True 

97 

98 # Get the maximum RMSD value and check it is a reasonable deviation from the average values 

99 # Otherwise, if it is an outlier, the test fails 

100 mean_rmsd_jump = np.mean(rmsd_jumps) 

101 stdv_rmsd_jump = np.std(rmsd_jumps) 

102 

103 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps 

104 # For this reason we allow the first frames to bypass the check 

105 # As soon as one frame is below the cutoff the bypass is finished for the following frames 

106 # Count the number of bypassed frames and warn the user in case there are any 

107 bypassed_frames = 0 

108 

109 # Capture outliers 

110 # If we capture more than 5 we stop searching 

111 outliers_count = 0 

112 max_z_score = 0 

113 max_z_score_frame = 0 

114 for i, rmsd_jump in enumerate(rmsd_jumps, 1): 

115 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump ) 

116 # Keep track of the maixmum z score 

117 if z_score > max_z_score: 

118 max_z_score = z_score 

119 max_z_score_frame = i 

120 # If z score bypassed the limit then report it 

121 if z_score > standard_deviations_cutoff: 

122 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet 

123 if i - 1 == bypassed_frames: 

124 bypassed_frames += 1 

125 continue 

126 if outliers_count >= 4: 

127 print(' etc...') 

128 break 

129 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}. RMSD jump: {rmsd_jump:4f}') 

130 outliers_count += 1 

131 

132 # Always print the maximum z score and its frames 

133 print(f' Maximum z score {max_z_score:4f} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}.\n' 

134 f' Mean: {mean_rmsd_jump:4f}. Stdv: {stdv_rmsd_jump:4f}') 

135 

136 # Set if there was any error 

137 error = None 

138 # If there were any outlier then the check has failed 

139 if outliers_count > 0: 

140 error = 'RMSD check has failed: there may be sudden jumps along the trajectory' 

141 # The message changes if the outliers are at the begining only 

142 elif bypassed_frames > 0: 

143 error = f'First {bypassed_frames} frames may be not equilibrated' 

144 

145 # In case there was a problem, 

146 if error: 

147 # Display a graph to show the distribution of sudden jumps along the trajectory 

148 if is_terminal: 

149 data = rmsd_jumps 

150 if outliers_count == 0 and bypassed_frames > 0: 

151 data = rmsd_jumps[0:bypassed_frames+100] 

152 plt.scatter(data) 

153 plt.title("RMSD jumps along the trajectory") 

154 plt.xlabel('Frame') 

155 plt.ylabel('RMSD jump') 

156 n_ticks = 5 

157 n_jumps = len(data) 

158 tickstep = math.ceil(n_jumps / n_ticks) 

159 xticks = [ t+1 for t in range(0, n_jumps, tickstep) ] 

160 xlabels = [ str(t) for t in xticks ] 

161 plt.xticks(xticks, xlabels) 

162 plt.show() 

163 # Add a warning an return True since the test failed in case we have mercy 

164 if TRAJECTORY_INTEGRITY_FLAG in mercy: 

165 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, error) 

166 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False) 

167 return False 

168 # Otherwise kill the process 

169 raise TestFailure(error) 

170 

171 print(' Test has passed successfully') 

172 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

173 return True 

174 

175# Look for sudden raises of RMSd values from one frame to another 

176# To do so, we check the RMSD of every frame using its previous frame as reference 

177# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione 

178# DANI: https://github.com/mdtraj/mdtraj/issues/1966 

179def check_trajectory_integrity_per_fragment ( 

180 input_structure_filename : str, 

181 input_trajectory_filename : str, 

182 structure : 'Structure', 

183 pbc_selection : 'Selection', 

184 mercy : List[str], 

185 trust: List[str], 

186 register : 'Register', 

187 #time_length : float, 

188 check_selection : str, 

189 # DANI: He visto saltos 'correctos' pasar de 6 

190 # DANI: He visto saltos 'incorrectos' no bajar de 10 

191 standard_deviations_cutoff : float) -> bool: 

192 

193 # Skip the test if we trust 

194 if TRAJECTORY_INTEGRITY_FLAG in trust: 

195 return True 

196 

197 # Skip the test if it is already passed according to the register 

198 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None): 

199 return True 

200 

201 # Remove old warnings 

202 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG) 

203 

204 # Parse the selection in VMD selection syntax 

205 parsed_selection = structure.select(check_selection, syntax='vmd') 

206 

207 # If there is nothing to check then warn the user and stop here 

208 if not parsed_selection: 

209 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis') 

210 

211 # Discard PBC residues from the selection to be checked 

212 parsed_selection -= pbc_selection 

213 

214 # If there is nothing to check then warn the user and stop here 

215 if not parsed_selection: 

216 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction') 

217 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na') 

218 return True 

219 

220 # Get fragments out of the parsed selection 

221 # Fragments will be analyzed independently 

222 # A small fragment may cause a small RMSD perturbation when it is part of a large structure 

223 # Note that jumps of partial fragments along boundaries are rare imaging problems 

224 # Usually are whole fragments the ones which jump 

225 fragments = list(structure.find_fragments(parsed_selection)) 

226 

227 print(f'Checking trajectory integrity ({len(fragments)} fragments)') 

228 

229 # Load the trajectory frame by frame 

230 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1) 

231 

232 # Save the previous frame any time 

233 previous_frame = next(trajectory) 

234 

235 # Save all RMSD jumps 

236 fragment_rmsd_jumps = { fragment: [] for fragment in fragments } 

237 

238 # Add an extra breakline before the first log 

239 print() 

240 

241 # Iterate trajectory frames 

242 for f, frame in enumerate(trajectory, 1): 

243 # Update the current frame log 

244 reprint(f' Frame {f}') 

245 

246 # Iterate over the different fragments 

247 for fragment in fragment_rmsd_jumps: 

248 # Calculate RMSD value between previous and current frame 

249 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries 

250 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices 

251 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=fragment.atom_indices)[0] 

252 fragment_rmsd_jumps[fragment].append(rmsd_value) 

253 

254 # Update the previous frame as the current one 

255 previous_frame = frame 

256 

257 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps 

258 # For this reason we allow the first frames to bypass the check 

259 # As soon as one frame is below the cutoff the bypass is finished for the following frames 

260 # Count the maximum number of bypassed frames and warn the user in case there are any 

261 max_bypassed_frames = 0 

262 

263 # Capture outliers 

264 # If we capture more than 5 we stop searching 

265 outliers_count = 0 

266 max_z_score = 0 

267 max_z_score_frame = 0 

268 

269 # Iterate over the different fragments 

270 for fragment, rmsd_jumps in fragment_rmsd_jumps.items(): 

271 

272 # If the trajectory has only 1 or 2 frames then there is no test to do 

273 if len(rmsd_jumps) <= 1: 

274 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

275 return True 

276 

277 # Get the maximum RMSD value and check it is a reasonable deviation from the average values 

278 # Otherwise, if it is an outlier, the test fails 

279 mean_rmsd_jump = np.mean(rmsd_jumps) 

280 stdv_rmsd_jump = np.std(rmsd_jumps) 

281 

282 # Count the number of bypassed frames for this fragment 

283 bypassed_frames = 0 

284 

285 fragment_max_z_score = 0 

286 fragment_max_z_score_frame = 0 

287 

288 for i, rmsd_jump in enumerate(rmsd_jumps, 1): 

289 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump ) 

290 # Keep track of the maixmum z score for this fragment 

291 if z_score > fragment_max_z_score: 

292 fragment_max_z_score = z_score 

293 fragment_max_z_score_frame = i 

294 # Keep track of the maixmum z score 

295 if z_score > max_z_score: 

296 max_z_score = z_score 

297 max_z_score_frame = i 

298 # If z score bypassed the limit then report it 

299 if z_score > standard_deviations_cutoff: 

300 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet 

301 if i - 1 == bypassed_frames: 

302 bypassed_frames += 1 

303 continue 

304 if outliers_count >= 4: 

305 print(' etc...') 

306 break 

307 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}') 

308 outliers_count += 1 

309 

310 print(f'Fragment {structure.name_selection(fragment)} -> {fragment_max_z_score} in frame {fragment_max_z_score_frame}') 

311 

312 # Update the maximum number of bypassed frames 

313 if bypassed_frames > max_bypassed_frames: 

314 max_bypassed_frames = bypassed_frames 

315 

316 # If there were any outlier then the check has failed 

317 if outliers_count > 0: 

318 # Add a warning an return True since the test failed in case we have mercy 

319 message = 'RMSD check has failed: there may be sudden jumps along the trajectory' 

320 if TRAJECTORY_INTEGRITY_FLAG in mercy: 

321 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message) 

322 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False) 

323 return False 

324 # Otherwise kill the process right away 

325 raise TestFailure(message) 

326 

327 # Always print the maximum z score and its frames 

328 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}') 

329 

330 # Warn the user if we had bypassed frames 

331 if max_bypassed_frames > 0: 

332 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated') 

333 

334 print(' Test has passed successfully') 

335 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

336 return True 

337 

338# Look for sudden raises of RMSd values from one frame to another 

339# To do so, we check the RMSD of every frame using its previous frame as reference 

340# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione 

341# DANI: https://github.com/mdtraj/mdtraj/issues/1966 

342def check_trajectory_integrity_per_fragment_2 ( 

343 input_structure_filename : str, 

344 input_trajectory_filename : str, 

345 structure : 'Structure', 

346 pbc_selection : 'Selection', 

347 mercy : List[str], 

348 trust: List[str], 

349 register : 'Register', 

350 #time_length : float, 

351 check_selection : str, 

352 # DANI: He visto saltos 'correctos' pasar de 6 

353 # DANI: He visto saltos 'incorrectos' no bajar de 10 

354 standard_deviations_cutoff : float) -> bool: 

355 

356 # Skip the test if we trust 

357 if TRAJECTORY_INTEGRITY_FLAG in trust: 

358 return True 

359 

360 # Skip the test if it is already passed according to the register 

361 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None): 

362 return True 

363 

364 # Remove old warnings 

365 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG) 

366 

367 # Parse the selection in VMD selection syntax 

368 parsed_selection = structure.select(check_selection, syntax='vmd') 

369 

370 # If there is nothing to check then warn the user and stop here 

371 if not parsed_selection: 

372 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis') 

373 

374 # Discard PBC residues from the selection to be checked 

375 parsed_selection -= pbc_selection 

376 

377 # If there is nothing to check then warn the user and stop here 

378 if not parsed_selection: 

379 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction') 

380 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na') 

381 return True 

382 

383 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps 

384 # For this reason we allow the first frames to bypass the check 

385 # As soon as one frame is below the cutoff the bypass is finished for the following frames 

386 # Count the maximum number of bypassed frames and warn the user in case there are any 

387 max_bypassed_frames = 0 

388 

389 # Get fragments out of the parsed selection 

390 # Fragments will be analyzed independently 

391 # A small fragment may cause a small RMSD perturbation when it is part of a large structure 

392 # Note that jumps of partial fragments along boundaries are rare imaging problems 

393 # Usually are whole fragments the ones which jump 

394 fragments = list(structure.find_fragments(parsed_selection)) 

395 

396 print(f'Checking trajectory integrity') 

397 

398 # Iterate over the different fragments 

399 for f, fragment in enumerate(fragments, 1): 

400 fragment_name = structure.name_selection(fragment) 

401 print(f'Fragment {f}/{len(fragments)} -> {fragment_name}') 

402 

403 # Load the trajectory frame by frame 

404 trajectory = mdt.iterload( 

405 input_trajectory_filename, 

406 top=input_structure_filename, 

407 atom_indices=fragment.atom_indices, 

408 chunk=1) 

409 

410 # Save all RMSD jumps 

411 rmsd_jumps = [] 

412 

413 # Save the previous frame any time 

414 previous_frame = next(trajectory) 

415 

416 # Add an extra breakline before the first log 

417 print() 

418 

419 # Iterate trajectory frames 

420 for f, frame in enumerate(trajectory, 1): 

421 # Update the current frame log 

422 reprint(f' Frame {f}') 

423 

424 # Calculate RMSD value between previous and current frame 

425 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries 

426 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices 

427 rmsd_value = mdt.rmsd(frame, previous_frame, precentered=True)[0] 

428 rmsd_jumps.append(rmsd_value) 

429 

430 # Update the previous frame as the current one 

431 previous_frame = frame 

432 

433 # If the trajectory has only 1 or 2 frames then there is no test to do 

434 if len(rmsd_jumps) <= 1: 

435 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

436 return True 

437 

438 # Get the maximum RMSD value and check it is a reasonable deviation from the average values 

439 # Otherwise, if it is an outlier, the test fails 

440 mean_rmsd_jump = np.mean(rmsd_jumps) 

441 stdv_rmsd_jump = np.std(rmsd_jumps) 

442 

443 # Count the number of bypassed frames for this fragment 

444 bypassed_frames = 0 

445 

446 # Capture outliers 

447 # If we capture more than 5 we stop searching 

448 outliers_count = 0 

449 max_z_score = 0 

450 max_z_score_frame = 0 

451 

452 for i, rmsd_jump in enumerate(rmsd_jumps, 1): 

453 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump ) 

454 # Keep track of the maixmum z score 

455 if z_score > max_z_score: 

456 max_z_score = z_score 

457 max_z_score_frame = i 

458 # If z score bypassed the limit then report it 

459 if z_score > standard_deviations_cutoff: 

460 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet 

461 if i - 1 == bypassed_frames: 

462 bypassed_frames += 1 

463 continue 

464 if outliers_count >= 4: 

465 print(' etc...') 

466 break 

467 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}') 

468 outliers_count += 1 

469 

470 # Always print the maximum z score and its frames 

471 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}') 

472 

473 # Update the maximum number of bypassed frames 

474 if bypassed_frames > max_bypassed_frames: 

475 max_bypassed_frames = bypassed_frames 

476 

477 # If there were any outlier then the check has failed 

478 if outliers_count > 0: 

479 # Add a warning an return True since the test failed in case we have mercy 

480 message = 'RMSD check has failed: there may be sudden jumps along the trajectory' 

481 if TRAJECTORY_INTEGRITY_FLAG in mercy: 

482 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message) 

483 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False) 

484 return False 

485 # Otherwise kill the process right away 

486 raise TestFailure(message) 

487 

488 # Warn the user if we had bypassed frames 

489 if max_bypassed_frames > 0: 

490 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated') 

491 

492 print(' Test has passed successfully') 

493 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

494 return True 

495 

496# Compute every residue RMSD to check if there are sudden jumps along the trajectory 

497# HARDCODE: This function is not fully implemented but enabled manually for specific cases 

498def check_trajectory_integrity_per_residue ( 

499 input_structure_filename : str, 

500 input_trajectory_filename : str, 

501 structure : 'Structure', 

502 pbc_selection : 'Selection', 

503 mercy : List[str], 

504 trust: List[str], 

505 register : 'Register', 

506 #time_length : float, 

507 check_selection : str, 

508 # DANI: He visto saltos 'correctos' pasar de 11 

509 # DANI: He visto saltos 'incorrectos' no bajar de 14 

510 standard_deviations_cutoff : float): 

511 

512 # HARDCODE: The default value does not work for a single residue 

513 standard_deviations_cutoff = 12 

514 

515 # Skip the test if we trust 

516 if TRAJECTORY_INTEGRITY_FLAG in trust: 

517 return True 

518 

519 # Skip the test if it is already passed according to the register 

520 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None): 

521 return True 

522 

523 # Remove old warnings 

524 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG) 

525 

526 # Parse the selection in VMD selection syntax 

527 parsed_selection = structure.select(check_selection, syntax='vmd') 

528 

529 # If there is nothing to check then warn the user and stop here 

530 if not parsed_selection: 

531 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis') 

532 

533 # Discard PBC residues from the selection to be checked 

534 parsed_selection -= pbc_selection 

535 

536 # If there is nothing to check then warn the user and stop here 

537 if not parsed_selection: 

538 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction') 

539 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na') 

540 return True 

541 

542 # We must filter out residues which only have 1 atom (e.g. ions) 

543 # This is because sometimes pytraj does not return results for them and then the number of results and residues does not match 

544 # More info: https://github.com/Amber-MD/pytraj/issues/1580 

545 ion_atom_indices = [] 

546 for residue in structure.residues: 

547 if len(residue.atom_indices) == 1: 

548 ion_atom_indices += residue.atom_indices 

549 ions_selection = structure.select_atom_indices(ion_atom_indices) 

550 parsed_selection -= ions_selection 

551 

552 # Parse the selection to a pytraj mask 

553 pytraj_selection = parsed_selection.to_pytraj() 

554 

555 # Calculate the residue indices of the overall structure remaining in the filtered trajectory 

556 residue_indices = structure.get_selection_residue_indices(parsed_selection) 

557 n_residues = len(residue_indices) 

558 

559 print('Checking trajectory integrity per residue') 

560 

561 # Parse the trajectory into pytraj and apply the mask 

562 # NEVER FORGET: The pytraj iterload does not accept a mask, but we apply the mask later in the analysis 

563 pt_trajectory = get_pytraj_trajectory(input_structure_filename, input_trajectory_filename, atom_selection = parsed_selection) 

564 

565 # Make sure the expected output number of residues to match with the pytraj results 

566 # These numbers may not match when ions are included so we better check 

567 # NEVER FORGET: The pytraj TrajectoryIterator is not an iterator 

568 first_frame = pt_trajectory[0:1] 

569 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None" 

570 # DANI: However there is no problem and the analysis runs flawlessly 

571 # DANI: For this reason we call this function with no resname and then we remove the log 

572 data_sample = pt.rmsd_perres(first_frame, ref=first_frame, perres_mask=pytraj_selection) 

573 # We remove the previous error log 

574 delete_previous_log() 

575 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001' 

576 del data_sample[0] 

577 if n_residues != len(data_sample): 

578 raise ValueError(f'Number of target residues ({n_residues}) does not match number of residues in data ({len(data_sample)})') 

579 

580 # Saving all RMSD jumps may take a lot of memory 

581 # Instead we will store the sum of values and the maximum 

582 # This way we can caluclate the average value at the end and check if the maximum is too far from it 

583 rmsd_per_residue_per_frame = [] 

584 

585 # Add an extra breakline before the first log 

586 print() 

587 

588 # Iterate trajectory frames 

589 previous_frame_trajectory = first_frame 

590 frame_number = 1 

591 for frame in pt_trajectory: 

592 # Update the current frame log 

593 reprint(f' Frame {frame_number}') 

594 # Set a pytraj trajectory out of a pytraj frame 

595 frame_trajectory = pt.Trajectory(top=pt_trajectory.top) 

596 frame_trajectory.append(frame) 

597 # Run the analysis in pytraj 

598 # The result data is a custom pytraj class: pytraj.datasets.datasetlist.DatasetList 

599 # This class has keys but its attributes can not be accessed through the key 

600 # They must be accessed thorugh the index 

601 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None" 

602 # DANI: However there is no problem and the analysis runs flawlessly 

603 # DANI: Adding resrage as a list/range was tried and did not work, only string works 

604 # DANI: Adding a string resrange however strongly impacts the speed when this function is called repeatedly 

605 # DANI: For this reason we call this function with no resname and then we remove the log 

606 rmsd_per_residue = pt.rmsd_perres(frame_trajectory, ref=previous_frame_trajectory, mask=pytraj_selection) 

607 # We remove the previous error log 

608 delete_previous_log() 

609 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001' 

610 del rmsd_per_residue[0] 

611 # Check we have no NaNs 

612 if np.isnan(rmsd_per_residue[0][0]): 

613 raise ValueError(f'We are having NaNs at frame {frame_number}') 

614 # Add last values to the list 

615 rmsd_per_residue_per_frame.append(rmsd_per_residue) 

616 # rmsd_per_residue_per_frame[:, frame] 

617 # Now update data for every residue 

618 # for index, residue_rmsd in enumerate(rmsd_per_residue): 

619 # current_rmsd = residue_rmsd[0] 

620 # # Get the current residue rmsd data 

621 # total_rmsd = rmsd_per_residue_per_frame[index] 

622 # total_rmsd['accumulated'] += current_rmsd 

623 # total_rmsd['maximum'] = max(total_rmsd['maximum'], current_rmsd) 

624 # Update previous coordinates 

625 previous_frame_trajectory = frame_trajectory 

626 # Update the frame_number 

627 frame_number += 1 

628 

629 # If the trajectory has only 1 or 2 frames then there is no test to do 

630 n_jumps = len(rmsd_per_residue_per_frame) 

631 if n_jumps <= 1: 

632 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

633 return True 

634 

635 # Keep the overall maximum z score, and its residue and frame for the logs 

636 overall_max_z_score = 0 

637 overall_max_z_score_frame = None 

638 overall_max_z_score_residue = None 

639 

640 # Keep the overall maximum bypassed frames number 

641 overall_bypassed_frames = 0 

642 

643 # Keep the overall count of residues with outliers 

644 overall_outliered_residues = 0 

645 

646 # Add an extra breakline before the next log 

647 print() 

648 

649 # Now check there are not sudden jumps for each residue separattely 

650 for residue_number in range(n_residues): 

651 reprint(f' Residue {residue_number+1}') 

652 # Get the rmsd jumps for each frame for this specific residue 

653 rmsd_jumps = [ frame[residue_number] for frame in rmsd_per_residue_per_frame ] 

654 

655 # Get the maximum RMSD value and check it is a reasonable deviation from the average values 

656 # Otherwise, if it is an outlier, the test fails 

657 mean_rmsd_jump = np.mean(rmsd_jumps) 

658 stdv_rmsd_jump = np.std(rmsd_jumps) 

659 

660 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps 

661 # For this reason we allow the first frames to bypass the check 

662 # As soon as one frame is below the cutoff the bypass is finished for the following frames 

663 # Count the number of bypassed frames and warn the user in case there are any 

664 bypassed_frames = 0 

665 

666 # Capture outliers 

667 # If we capture more than 5 we stop searching 

668 outliers_count = 0 

669 max_z_score = 0 

670 max_z_score_frame = 0 

671 for i, rmsd_jump in enumerate(rmsd_jumps, 1): 

672 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump ) 

673 # Keep track of the maixmum z score 

674 if z_score > max_z_score: 

675 max_z_score = z_score 

676 max_z_score_frame = i 

677 # If z score bypassed the limit then report it 

678 if z_score > standard_deviations_cutoff: 

679 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet 

680 if i - 1 == bypassed_frames: 

681 bypassed_frames += 1 

682 continue 

683 # Otherwise we consider this as an outlier and thus the test has failed 

684 # However we keep checking just to find and report the highest outlier 

685 outliers_count += 1 

686 

687 # Update the overall bypassed frames if we overcomed it 

688 if overall_bypassed_frames < bypassed_frames: 

689 overall_bypassed_frames = bypassed_frames 

690 

691 # Update the overall max z score if we overcome it 

692 if max_z_score > overall_max_z_score: 

693 overall_max_z_score = max_z_score 

694 overall_max_z_score_frame = max_z_score_frame 

695 overall_max_z_score_residue = residue_number 

696 

697 # If there were any outlier then add one to the overall count 

698 if outliers_count > 0: 

699 overall_outliered_residues += 1 

700 

701 # Always print the overall maximum z score and its frames and residue 

702 overall_max_z_score_residue_label = pt_trajectory.top.residue(overall_max_z_score_residue) 

703 print(f' Maximum z score {overall_max_z_score} reported for residue {overall_max_z_score_residue_label} between frames {overall_max_z_score_frame} and {overall_max_z_score_frame + 1}') 

704 

705 # If there were any outlier then the check has failed 

706 if overall_outliered_residues > 0: 

707 # Add a warning an return True since the test failed in case we have mercy 

708 message = 'RMSD check has failed: there may be sudden jumps along the trajectory' 

709 if TRAJECTORY_INTEGRITY_FLAG in mercy: 

710 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message) 

711 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False) 

712 return False 

713 # Otherwise kill the process right away 

714 raise TestFailure(message) 

715 

716 # Warn the user if we had bypassed frames 

717 if overall_bypassed_frames > 0: 

718 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {overall_bypassed_frames} frames may be not equilibrated') 

719 

720 print(' Test has passed successfully') 

721 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True) 

722 return True