Coverage for model_workflow/analyses/rmsd

1import mdtraj as mdt

2import pytraj as pt

3import numpy as np

4import time

5import math

6import sys

8# Visual output tools

9from tqdm import tqdm

10import plotext as plt

12from model_workflow.utils.auxiliar import delete_previous_log, reprint, TestFailure, warn

13from model_workflow.utils.constants import TRAJECTORY_INTEGRITY_FLAG

14from model_workflow.utils.pyt_spells import get_pytraj_trajectory

15from model_workflow.utils.type_hints import *

17# Check if the output is going to a terminal or not

18is_terminal = sys.stdout.isatty()

20# LORE

21# This test was originaly intended to use a RMSD jump cutoff based on number of atoms and timestep

22# However, after a deep study, it was observed that simulations with similar features may show very different RMSD jumps

23# For this reason now we comptue RMSD jumps along the whole trajectory and check that the biggest jump is not an outlier

24# The outlier is defined according to how many times the standard deviation far from the mean is a value

26# Look for sudden raises of RMSd values from one frame to another

27# To do so, we check the RMSD of every frame using its previous frame as reference

28def check_trajectory_integrity (

29 input_structure_filename : str,

30 input_trajectory_filename : str,

31 structure : 'Structure',

32 pbc_selection : 'Selection',

33 mercy : List[str],

34 trust: List[str],

35 register : 'Register',

36 #time_length : float,

37 check_selection : str,

38 # DANI: He visto saltos 'correctos' pasar de 6

39 # DANI: He visto saltos 'incorrectos' no bajar de 10

40 standard_deviations_cutoff : float,

41 snapshots : int) -> bool:

43 # Skip the test if we trust

44 if TRAJECTORY_INTEGRITY_FLAG in trust:

45 return True

47 # Skip the test if it is already passed according to the register

48 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):

49 return True

51 # Remove old warnings

52 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)

54 # Parse the selection in VMD selection syntax

55 parsed_selection = structure.select(check_selection, syntax='vmd')

57 # If there is nothing to check then warn the user and stop here

58 if not parsed_selection:

59 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')

61 # Discard PBC residues from the selection to be checked

62 parsed_selection -= pbc_selection

64 # If there is nothing to check then warn the user and stop here

65 if not parsed_selection:

66 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')

67 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')

68 return True

70 print('Checking trajectory integrity')

72 # Load the trajectory frame by frame

73 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1)

75 # Save the previous frame any time

76 previous_frame = next(trajectory)

78 # Save all RMSD jumps

79 rmsd_jumps = []

81 # Initialize progress bar first

82 pbar = tqdm(enumerate(trajectory, 1), total=snapshots, desc=' Frame', unit='frame', initial=1)

84 for f, frame in pbar:

85 # Calculate RMSD value between previous and current frame

86 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=parsed_selection.atom_indices)[0]

87 rmsd_jumps.append(rmsd_value)

89 # Update the previous frame as the current one

90 previous_frame = frame

91 time.sleep(0.1) # Needed for progress bar to be print at the correct place

93 # If the trajectory has only 1 or 2 frames then there is no test to do

94 if len(rmsd_jumps) <= 1:

95 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

96 return True

98 # Get the maximum RMSD value and check it is a reasonable deviation from the average values

99 # Otherwise, if it is an outlier, the test fails

100 mean_rmsd_jump = np.mean(rmsd_jumps)

101 stdv_rmsd_jump = np.std(rmsd_jumps)

102

103 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps

104 # For this reason we allow the first frames to bypass the check

105 # As soon as one frame is below the cutoff the bypass is finished for the following frames

106 # Count the number of bypassed frames and warn the user in case there are any

107 bypassed_frames = 0

108

109 # Capture outliers

110 # If we capture more than 5 we stop searching

111 outliers_count = 0

112 max_z_score = 0

113 max_z_score_frame = 0

114 for i, rmsd_jump in enumerate(rmsd_jumps, 1):

115 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )

116 # Keep track of the maixmum z score

117 if z_score > max_z_score:

118 max_z_score = z_score

119 max_z_score_frame = i

120 # If z score bypassed the limit then report it

121 if z_score > standard_deviations_cutoff:

122 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet

123 if i - 1 == bypassed_frames:

124 bypassed_frames += 1

125 continue

126 if outliers_count >= 4:

127 print(' etc...')

128 break

129 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}. RMSD jump: {rmsd_jump:4f}')

130 outliers_count += 1

131

132 # Always print the maximum z score and its frames

133 print(f' Maximum z score {max_z_score:4f} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}.\n'

134 f' Mean: {mean_rmsd_jump:4f}. Stdv: {stdv_rmsd_jump:4f}')

135

136 # Set if there was any error

137 error = None

138 # If there were any outlier then the check has failed

139 if outliers_count > 0:

140 error = 'RMSD check has failed: there may be sudden jumps along the trajectory'

141 # The message changes if the outliers are at the begining only

142 elif bypassed_frames > 0:

143 error = f'First {bypassed_frames} frames may be not equilibrated'

144

145 # In case there was a problem,

146 if error:

147 # Display a graph to show the distribution of sudden jumps along the trajectory

148 if is_terminal:

149 data = rmsd_jumps

150 if outliers_count == 0 and bypassed_frames > 0:

151 data = rmsd_jumps[0:bypassed_frames+100]

152 plt.scatter(data)

153 plt.title("RMSD jumps along the trajectory")

154 plt.xlabel('Frame')

155 plt.ylabel('RMSD jump')

156 n_ticks = 5

157 n_jumps = len(data)

158 tickstep = math.ceil(n_jumps / n_ticks)

159 xticks = [ t+1 for t in range(0, n_jumps, tickstep) ]

160 xlabels = [ str(t) for t in xticks ]

161 plt.xticks(xticks, xlabels)

162 plt.show()

163 # Add a warning an return True since the test failed in case we have mercy

164 if TRAJECTORY_INTEGRITY_FLAG in mercy:

165 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, error)

166 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)

167 return False

168 # Otherwise kill the process

169 raise TestFailure(error)

170

171 print(' Test has passed successfully')

172 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

173 return True

174

175# Look for sudden raises of RMSd values from one frame to another

176# To do so, we check the RMSD of every frame using its previous frame as reference

177# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione

178# DANI: https://github.com/mdtraj/mdtraj/issues/1966

179def check_trajectory_integrity_per_fragment (

180 input_structure_filename : str,

181 input_trajectory_filename : str,

182 structure : 'Structure',

183 pbc_selection : 'Selection',

184 mercy : List[str],

185 trust: List[str],

186 register : 'Register',

187 #time_length : float,

188 check_selection : str,

189 # DANI: He visto saltos 'correctos' pasar de 6

190 # DANI: He visto saltos 'incorrectos' no bajar de 10

191 standard_deviations_cutoff : float) -> bool:

192

193 # Skip the test if we trust

194 if TRAJECTORY_INTEGRITY_FLAG in trust:

195 return True

196

197 # Skip the test if it is already passed according to the register

198 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):

199 return True

200

201 # Remove old warnings

202 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)

203

204 # Parse the selection in VMD selection syntax

205 parsed_selection = structure.select(check_selection, syntax='vmd')

206

207 # If there is nothing to check then warn the user and stop here

208 if not parsed_selection:

209 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')

210

211 # Discard PBC residues from the selection to be checked

212 parsed_selection -= pbc_selection

213

214 # If there is nothing to check then warn the user and stop here

215 if not parsed_selection:

216 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')

217 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')

218 return True

219

220 # Get fragments out of the parsed selection

221 # Fragments will be analyzed independently

222 # A small fragment may cause a small RMSD perturbation when it is part of a large structure

223 # Note that jumps of partial fragments along boundaries are rare imaging problems

224 # Usually are whole fragments the ones which jump

225 fragments = list(structure.find_fragments(parsed_selection))

226

227 print(f'Checking trajectory integrity ({len(fragments)} fragments)')

228

229 # Load the trajectory frame by frame

230 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1)

231

232 # Save the previous frame any time

233 previous_frame = next(trajectory)

234

235 # Save all RMSD jumps

236 fragment_rmsd_jumps = { fragment: [] for fragment in fragments }

237

238 # Add an extra breakline before the first log

239 print()

240

241 # Iterate trajectory frames

242 for f, frame in enumerate(trajectory, 1):

243 # Update the current frame log

244 reprint(f' Frame {f}')

245

246 # Iterate over the different fragments

247 for fragment in fragment_rmsd_jumps:

248 # Calculate RMSD value between previous and current frame

249 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries

250 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices

251 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=fragment.atom_indices)[0]

252 fragment_rmsd_jumps[fragment].append(rmsd_value)

253

254 # Update the previous frame as the current one

255 previous_frame = frame

256

257 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps

258 # For this reason we allow the first frames to bypass the check

259 # As soon as one frame is below the cutoff the bypass is finished for the following frames

260 # Count the maximum number of bypassed frames and warn the user in case there are any

261 max_bypassed_frames = 0

262

263 # Capture outliers

264 # If we capture more than 5 we stop searching

265 outliers_count = 0

266 max_z_score = 0

267 max_z_score_frame = 0

268

269 # Iterate over the different fragments

270 for fragment, rmsd_jumps in fragment_rmsd_jumps.items():

271

272 # If the trajectory has only 1 or 2 frames then there is no test to do

273 if len(rmsd_jumps) <= 1:

274 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

275 return True

276

277 # Get the maximum RMSD value and check it is a reasonable deviation from the average values

278 # Otherwise, if it is an outlier, the test fails

279 mean_rmsd_jump = np.mean(rmsd_jumps)

280 stdv_rmsd_jump = np.std(rmsd_jumps)

281

282 # Count the number of bypassed frames for this fragment

283 bypassed_frames = 0

284

285 fragment_max_z_score = 0

286 fragment_max_z_score_frame = 0

287

288 for i, rmsd_jump in enumerate(rmsd_jumps, 1):

289 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )

290 # Keep track of the maixmum z score for this fragment

291 if z_score > fragment_max_z_score:

292 fragment_max_z_score = z_score

293 fragment_max_z_score_frame = i

294 # Keep track of the maixmum z score

295 if z_score > max_z_score:

296 max_z_score = z_score

297 max_z_score_frame = i

298 # If z score bypassed the limit then report it

299 if z_score > standard_deviations_cutoff:

300 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet

301 if i - 1 == bypassed_frames:

302 bypassed_frames += 1

303 continue

304 if outliers_count >= 4:

305 print(' etc...')

306 break

307 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}')

308 outliers_count += 1

309

310 print(f'Fragment {structure.name_selection(fragment)} -> {fragment_max_z_score} in frame {fragment_max_z_score_frame}')

311

312 # Update the maximum number of bypassed frames

313 if bypassed_frames > max_bypassed_frames:

314 max_bypassed_frames = bypassed_frames

315

316 # If there were any outlier then the check has failed

317 if outliers_count > 0:

318 # Add a warning an return True since the test failed in case we have mercy

319 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'

320 if TRAJECTORY_INTEGRITY_FLAG in mercy:

321 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)

322 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)

323 return False

324 # Otherwise kill the process right away

325 raise TestFailure(message)

326

327 # Always print the maximum z score and its frames

328 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}')

329

330 # Warn the user if we had bypassed frames

331 if max_bypassed_frames > 0:

332 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated')

333

334 print(' Test has passed successfully')

335 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

336 return True

337

338# Look for sudden raises of RMSd values from one frame to another

339# To do so, we check the RMSD of every frame using its previous frame as reference

340# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione

341# DANI: https://github.com/mdtraj/mdtraj/issues/1966

342def check_trajectory_integrity_per_fragment_2 (

343 input_structure_filename : str,

344 input_trajectory_filename : str,

345 structure : 'Structure',

346 pbc_selection : 'Selection',

347 mercy : List[str],

348 trust: List[str],

349 register : 'Register',

350 #time_length : float,

351 check_selection : str,

352 # DANI: He visto saltos 'correctos' pasar de 6

353 # DANI: He visto saltos 'incorrectos' no bajar de 10

354 standard_deviations_cutoff : float) -> bool:

355

356 # Skip the test if we trust

357 if TRAJECTORY_INTEGRITY_FLAG in trust:

358 return True

359

360 # Skip the test if it is already passed according to the register

361 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):

362 return True

363

364 # Remove old warnings

365 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)

366

367 # Parse the selection in VMD selection syntax

368 parsed_selection = structure.select(check_selection, syntax='vmd')

369

370 # If there is nothing to check then warn the user and stop here

371 if not parsed_selection:

372 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')

373

374 # Discard PBC residues from the selection to be checked

375 parsed_selection -= pbc_selection

376

377 # If there is nothing to check then warn the user and stop here

378 if not parsed_selection:

379 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')

380 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')

381 return True

382

383 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps

384 # For this reason we allow the first frames to bypass the check

385 # As soon as one frame is below the cutoff the bypass is finished for the following frames

386 # Count the maximum number of bypassed frames and warn the user in case there are any

387 max_bypassed_frames = 0

388

389 # Get fragments out of the parsed selection

390 # Fragments will be analyzed independently

391 # A small fragment may cause a small RMSD perturbation when it is part of a large structure

392 # Note that jumps of partial fragments along boundaries are rare imaging problems

393 # Usually are whole fragments the ones which jump

394 fragments = list(structure.find_fragments(parsed_selection))

395

396 print(f'Checking trajectory integrity')

397

398 # Iterate over the different fragments

399 for f, fragment in enumerate(fragments, 1):

400 fragment_name = structure.name_selection(fragment)

401 print(f'Fragment {f}/{len(fragments)} -> {fragment_name}')

402

403 # Load the trajectory frame by frame

404 trajectory = mdt.iterload(

405 input_trajectory_filename,

406 top=input_structure_filename,

407 atom_indices=fragment.atom_indices,

408 chunk=1)

409

410 # Save all RMSD jumps

411 rmsd_jumps = []

412

413 # Save the previous frame any time

414 previous_frame = next(trajectory)

415

416 # Add an extra breakline before the first log

417 print()

418

419 # Iterate trajectory frames

420 for f, frame in enumerate(trajectory, 1):

421 # Update the current frame log

422 reprint(f' Frame {f}')

423

424 # Calculate RMSD value between previous and current frame

425 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries

426 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices

427 rmsd_value = mdt.rmsd(frame, previous_frame, precentered=True)[0]

428 rmsd_jumps.append(rmsd_value)

429

430 # Update the previous frame as the current one

431 previous_frame = frame

432

433 # If the trajectory has only 1 or 2 frames then there is no test to do

434 if len(rmsd_jumps) <= 1:

435 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

436 return True

437

438 # Get the maximum RMSD value and check it is a reasonable deviation from the average values

439 # Otherwise, if it is an outlier, the test fails

440 mean_rmsd_jump = np.mean(rmsd_jumps)

441 stdv_rmsd_jump = np.std(rmsd_jumps)

442

443 # Count the number of bypassed frames for this fragment

444 bypassed_frames = 0

445

446 # Capture outliers

447 # If we capture more than 5 we stop searching

448 outliers_count = 0

449 max_z_score = 0

450 max_z_score_frame = 0

451

452 for i, rmsd_jump in enumerate(rmsd_jumps, 1):

453 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )

454 # Keep track of the maixmum z score

455 if z_score > max_z_score:

456 max_z_score = z_score

457 max_z_score_frame = i

458 # If z score bypassed the limit then report it

459 if z_score > standard_deviations_cutoff:

460 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet

461 if i - 1 == bypassed_frames:

462 bypassed_frames += 1

463 continue

464 if outliers_count >= 4:

465 print(' etc...')

466 break

467 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}')

468 outliers_count += 1

469

470 # Always print the maximum z score and its frames

471 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}')

472

473 # Update the maximum number of bypassed frames

474 if bypassed_frames > max_bypassed_frames:

475 max_bypassed_frames = bypassed_frames

476

477 # If there were any outlier then the check has failed

478 if outliers_count > 0:

479 # Add a warning an return True since the test failed in case we have mercy

480 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'

481 if TRAJECTORY_INTEGRITY_FLAG in mercy:

482 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)

483 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)

484 return False

485 # Otherwise kill the process right away

486 raise TestFailure(message)

487

488 # Warn the user if we had bypassed frames

489 if max_bypassed_frames > 0:

490 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated')

491

492 print(' Test has passed successfully')

493 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

494 return True

495

496# Compute every residue RMSD to check if there are sudden jumps along the trajectory

497# HARDCODE: This function is not fully implemented but enabled manually for specific cases

498def check_trajectory_integrity_per_residue (

499 input_structure_filename : str,

500 input_trajectory_filename : str,

501 structure : 'Structure',

502 pbc_selection : 'Selection',

503 mercy : List[str],

504 trust: List[str],

505 register : 'Register',

506 #time_length : float,

507 check_selection : str,

508 # DANI: He visto saltos 'correctos' pasar de 11

509 # DANI: He visto saltos 'incorrectos' no bajar de 14

510 standard_deviations_cutoff : float):

511

512 # HARDCODE: The default value does not work for a single residue

513 standard_deviations_cutoff = 12

514

515 # Skip the test if we trust

516 if TRAJECTORY_INTEGRITY_FLAG in trust:

517 return True

518

519 # Skip the test if it is already passed according to the register

520 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):

521 return True

522

523 # Remove old warnings

524 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)

525

526 # Parse the selection in VMD selection syntax

527 parsed_selection = structure.select(check_selection, syntax='vmd')

528

529 # If there is nothing to check then warn the user and stop here

530 if not parsed_selection:

531 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')

532

533 # Discard PBC residues from the selection to be checked

534 parsed_selection -= pbc_selection

535

536 # If there is nothing to check then warn the user and stop here

537 if not parsed_selection:

538 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')

539 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')

540 return True

541

542 # We must filter out residues which only have 1 atom (e.g. ions)

543 # This is because sometimes pytraj does not return results for them and then the number of results and residues does not match

544 # More info: https://github.com/Amber-MD/pytraj/issues/1580

545 ion_atom_indices = []

546 for residue in structure.residues:

547 if len(residue.atom_indices) == 1:

548 ion_atom_indices += residue.atom_indices

549 ions_selection = structure.select_atom_indices(ion_atom_indices)

550 parsed_selection -= ions_selection

551

552 # Parse the selection to a pytraj mask

553 pytraj_selection = parsed_selection.to_pytraj()

554

555 # Calculate the residue indices of the overall structure remaining in the filtered trajectory

556 residue_indices = structure.get_selection_residue_indices(parsed_selection)

557 n_residues = len(residue_indices)

558

559 print('Checking trajectory integrity per residue')

560

561 # Parse the trajectory into pytraj and apply the mask

562 # NEVER FORGET: The pytraj iterload does not accept a mask, but we apply the mask later in the analysis

563 pt_trajectory = get_pytraj_trajectory(input_structure_filename, input_trajectory_filename, atom_selection = parsed_selection)

564

565 # Make sure the expected output number of residues to match with the pytraj results

566 # These numbers may not match when ions are included so we better check

567 # NEVER FORGET: The pytraj TrajectoryIterator is not an iterator

568 first_frame = pt_trajectory[0:1]

569 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None"

570 # DANI: However there is no problem and the analysis runs flawlessly

571 # DANI: For this reason we call this function with no resname and then we remove the log

572 data_sample = pt.rmsd_perres(first_frame, ref=first_frame, perres_mask=pytraj_selection)

573 # We remove the previous error log

574 delete_previous_log()

575 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001'

576 del data_sample[0]

577 if n_residues != len(data_sample):

578 raise ValueError(f'Number of target residues ({n_residues}) does not match number of residues in data ({len(data_sample)})')

579

580 # Saving all RMSD jumps may take a lot of memory

581 # Instead we will store the sum of values and the maximum

582 # This way we can caluclate the average value at the end and check if the maximum is too far from it

583 rmsd_per_residue_per_frame = []

584

585 # Add an extra breakline before the first log

586 print()

587

588 # Iterate trajectory frames

589 previous_frame_trajectory = first_frame

590 frame_number = 1

591 for frame in pt_trajectory:

592 # Update the current frame log

593 reprint(f' Frame {frame_number}')

594 # Set a pytraj trajectory out of a pytraj frame

595 frame_trajectory = pt.Trajectory(top=pt_trajectory.top)

596 frame_trajectory.append(frame)

597 # Run the analysis in pytraj

598 # The result data is a custom pytraj class: pytraj.datasets.datasetlist.DatasetList

599 # This class has keys but its attributes can not be accessed through the key

600 # They must be accessed thorugh the index

601 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None"

602 # DANI: However there is no problem and the analysis runs flawlessly

603 # DANI: Adding resrage as a list/range was tried and did not work, only string works

604 # DANI: Adding a string resrange however strongly impacts the speed when this function is called repeatedly

605 # DANI: For this reason we call this function with no resname and then we remove the log

606 rmsd_per_residue = pt.rmsd_perres(frame_trajectory, ref=previous_frame_trajectory, mask=pytraj_selection)

607 # We remove the previous error log

608 delete_previous_log()

609 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001'

610 del rmsd_per_residue[0]

611 # Check we have no NaNs

612 if np.isnan(rmsd_per_residue[0][0]):

613 raise ValueError(f'We are having NaNs at frame {frame_number}')

614 # Add last values to the list

615 rmsd_per_residue_per_frame.append(rmsd_per_residue)

616 # rmsd_per_residue_per_frame[:, frame]

617 # Now update data for every residue

618 # for index, residue_rmsd in enumerate(rmsd_per_residue):

619 # current_rmsd = residue_rmsd[0]

620 # # Get the current residue rmsd data

621 # total_rmsd = rmsd_per_residue_per_frame[index]

622 # total_rmsd['accumulated'] += current_rmsd

623 # total_rmsd['maximum'] = max(total_rmsd['maximum'], current_rmsd)

624 # Update previous coordinates

625 previous_frame_trajectory = frame_trajectory

626 # Update the frame_number

627 frame_number += 1

628

629 # If the trajectory has only 1 or 2 frames then there is no test to do

630 n_jumps = len(rmsd_per_residue_per_frame)

631 if n_jumps <= 1:

632 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

633 return True

634

635 # Keep the overall maximum z score, and its residue and frame for the logs

636 overall_max_z_score = 0

637 overall_max_z_score_frame = None

638 overall_max_z_score_residue = None

639

640 # Keep the overall maximum bypassed frames number

641 overall_bypassed_frames = 0

642

643 # Keep the overall count of residues with outliers

644 overall_outliered_residues = 0

645

646 # Add an extra breakline before the next log

647 print()

648

649 # Now check there are not sudden jumps for each residue separattely

650 for residue_number in range(n_residues):

651 reprint(f' Residue {residue_number+1}')

652 # Get the rmsd jumps for each frame for this specific residue

653 rmsd_jumps = [ frame[residue_number] for frame in rmsd_per_residue_per_frame ]

654

655 # Get the maximum RMSD value and check it is a reasonable deviation from the average values

656 # Otherwise, if it is an outlier, the test fails

657 mean_rmsd_jump = np.mean(rmsd_jumps)

658 stdv_rmsd_jump = np.std(rmsd_jumps)

659

660 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps

661 # For this reason we allow the first frames to bypass the check

662 # As soon as one frame is below the cutoff the bypass is finished for the following frames

663 # Count the number of bypassed frames and warn the user in case there are any

664 bypassed_frames = 0

665

666 # Capture outliers

667 # If we capture more than 5 we stop searching

668 outliers_count = 0

669 max_z_score = 0

670 max_z_score_frame = 0

671 for i, rmsd_jump in enumerate(rmsd_jumps, 1):

672 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )

673 # Keep track of the maixmum z score

674 if z_score > max_z_score:

675 max_z_score = z_score

676 max_z_score_frame = i

677 # If z score bypassed the limit then report it

678 if z_score > standard_deviations_cutoff:

679 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet

680 if i - 1 == bypassed_frames:

681 bypassed_frames += 1

682 continue

683 # Otherwise we consider this as an outlier and thus the test has failed

684 # However we keep checking just to find and report the highest outlier

685 outliers_count += 1

686

687 # Update the overall bypassed frames if we overcomed it

688 if overall_bypassed_frames < bypassed_frames:

689 overall_bypassed_frames = bypassed_frames

690

691 # Update the overall max z score if we overcome it

692 if max_z_score > overall_max_z_score:

693 overall_max_z_score = max_z_score

694 overall_max_z_score_frame = max_z_score_frame

695 overall_max_z_score_residue = residue_number

696

697 # If there were any outlier then add one to the overall count

698 if outliers_count > 0:

699 overall_outliered_residues += 1

700

701 # Always print the overall maximum z score and its frames and residue

702 overall_max_z_score_residue_label = pt_trajectory.top.residue(overall_max_z_score_residue)

703 print(f' Maximum z score {overall_max_z_score} reported for residue {overall_max_z_score_residue_label} between frames {overall_max_z_score_frame} and {overall_max_z_score_frame + 1}')

704

705 # If there were any outlier then the check has failed

706 if overall_outliered_residues > 0:

707 # Add a warning an return True since the test failed in case we have mercy

708 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'

709 if TRAJECTORY_INTEGRITY_FLAG in mercy:

710 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)

711 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)

712 return False

713 # Otherwise kill the process right away

714 raise TestFailure(message)

715

716 # Warn the user if we had bypassed frames

717 if overall_bypassed_frames > 0:

718 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {overall_bypassed_frames} frames may be not equilibrated')

719

720 print(' Test has passed successfully')

721 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)

722 return True

Coverage for model_workflow/analyses/rmsd_check.py: 17%

328 statements