Coverage for model_workflow/analyses/rmsd_check.py: 17%
328 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import mdtraj as mdt
2import pytraj as pt
3import numpy as np
4import time
5import math
6import sys
8# Visual output tools
9from tqdm import tqdm
10import plotext as plt
12from model_workflow.utils.auxiliar import delete_previous_log, reprint, TestFailure, warn
13from model_workflow.utils.constants import TRAJECTORY_INTEGRITY_FLAG
14from model_workflow.utils.pyt_spells import get_pytraj_trajectory
15from model_workflow.utils.type_hints import *
17# Check if the output is going to a terminal or not
18is_terminal = sys.stdout.isatty()
20# LORE
21# This test was originaly intended to use a RMSD jump cutoff based on number of atoms and timestep
22# However, after a deep study, it was observed that simulations with similar features may show very different RMSD jumps
23# For this reason now we comptue RMSD jumps along the whole trajectory and check that the biggest jump is not an outlier
24# The outlier is defined according to how many times the standard deviation far from the mean is a value
26# Look for sudden raises of RMSd values from one frame to another
27# To do so, we check the RMSD of every frame using its previous frame as reference
28def check_trajectory_integrity (
29 input_structure_filename : str,
30 input_trajectory_filename : str,
31 structure : 'Structure',
32 pbc_selection : 'Selection',
33 mercy : List[str],
34 trust: List[str],
35 register : 'Register',
36 #time_length : float,
37 check_selection : str,
38 # DANI: He visto saltos 'correctos' pasar de 6
39 # DANI: He visto saltos 'incorrectos' no bajar de 10
40 standard_deviations_cutoff : float,
41 snapshots : int) -> bool:
43 # Skip the test if we trust
44 if TRAJECTORY_INTEGRITY_FLAG in trust:
45 return True
47 # Skip the test if it is already passed according to the register
48 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):
49 return True
51 # Remove old warnings
52 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)
54 # Parse the selection in VMD selection syntax
55 parsed_selection = structure.select(check_selection, syntax='vmd')
57 # If there is nothing to check then warn the user and stop here
58 if not parsed_selection:
59 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')
61 # Discard PBC residues from the selection to be checked
62 parsed_selection -= pbc_selection
64 # If there is nothing to check then warn the user and stop here
65 if not parsed_selection:
66 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')
67 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')
68 return True
70 print('Checking trajectory integrity')
72 # Load the trajectory frame by frame
73 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1)
75 # Save the previous frame any time
76 previous_frame = next(trajectory)
78 # Save all RMSD jumps
79 rmsd_jumps = []
81 # Initialize progress bar first
82 pbar = tqdm(enumerate(trajectory, 1), total=snapshots, desc=' Frame', unit='frame', initial=1)
84 for f, frame in pbar:
85 # Calculate RMSD value between previous and current frame
86 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=parsed_selection.atom_indices)[0]
87 rmsd_jumps.append(rmsd_value)
89 # Update the previous frame as the current one
90 previous_frame = frame
91 time.sleep(0.1) # Needed for progress bar to be print at the correct place
93 # If the trajectory has only 1 or 2 frames then there is no test to do
94 if len(rmsd_jumps) <= 1:
95 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
96 return True
98 # Get the maximum RMSD value and check it is a reasonable deviation from the average values
99 # Otherwise, if it is an outlier, the test fails
100 mean_rmsd_jump = np.mean(rmsd_jumps)
101 stdv_rmsd_jump = np.std(rmsd_jumps)
103 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps
104 # For this reason we allow the first frames to bypass the check
105 # As soon as one frame is below the cutoff the bypass is finished for the following frames
106 # Count the number of bypassed frames and warn the user in case there are any
107 bypassed_frames = 0
109 # Capture outliers
110 # If we capture more than 5 we stop searching
111 outliers_count = 0
112 max_z_score = 0
113 max_z_score_frame = 0
114 for i, rmsd_jump in enumerate(rmsd_jumps, 1):
115 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )
116 # Keep track of the maixmum z score
117 if z_score > max_z_score:
118 max_z_score = z_score
119 max_z_score_frame = i
120 # If z score bypassed the limit then report it
121 if z_score > standard_deviations_cutoff:
122 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet
123 if i - 1 == bypassed_frames:
124 bypassed_frames += 1
125 continue
126 if outliers_count >= 4:
127 print(' etc...')
128 break
129 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}. RMSD jump: {rmsd_jump:4f}')
130 outliers_count += 1
132 # Always print the maximum z score and its frames
133 print(f' Maximum z score {max_z_score:4f} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}.\n'
134 f' Mean: {mean_rmsd_jump:4f}. Stdv: {stdv_rmsd_jump:4f}')
136 # Set if there was any error
137 error = None
138 # If there were any outlier then the check has failed
139 if outliers_count > 0:
140 error = 'RMSD check has failed: there may be sudden jumps along the trajectory'
141 # The message changes if the outliers are at the begining only
142 elif bypassed_frames > 0:
143 error = f'First {bypassed_frames} frames may be not equilibrated'
145 # In case there was a problem,
146 if error:
147 # Display a graph to show the distribution of sudden jumps along the trajectory
148 if is_terminal:
149 data = rmsd_jumps
150 if outliers_count == 0 and bypassed_frames > 0:
151 data = rmsd_jumps[0:bypassed_frames+100]
152 plt.scatter(data)
153 plt.title("RMSD jumps along the trajectory")
154 plt.xlabel('Frame')
155 plt.ylabel('RMSD jump')
156 n_ticks = 5
157 n_jumps = len(data)
158 tickstep = math.ceil(n_jumps / n_ticks)
159 xticks = [ t+1 for t in range(0, n_jumps, tickstep) ]
160 xlabels = [ str(t) for t in xticks ]
161 plt.xticks(xticks, xlabels)
162 plt.show()
163 # Add a warning an return True since the test failed in case we have mercy
164 if TRAJECTORY_INTEGRITY_FLAG in mercy:
165 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, error)
166 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)
167 return False
168 # Otherwise kill the process
169 raise TestFailure(error)
171 print(' Test has passed successfully')
172 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
173 return True
175# Look for sudden raises of RMSd values from one frame to another
176# To do so, we check the RMSD of every frame using its previous frame as reference
177# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione
178# DANI: https://github.com/mdtraj/mdtraj/issues/1966
179def check_trajectory_integrity_per_fragment (
180 input_structure_filename : str,
181 input_trajectory_filename : str,
182 structure : 'Structure',
183 pbc_selection : 'Selection',
184 mercy : List[str],
185 trust: List[str],
186 register : 'Register',
187 #time_length : float,
188 check_selection : str,
189 # DANI: He visto saltos 'correctos' pasar de 6
190 # DANI: He visto saltos 'incorrectos' no bajar de 10
191 standard_deviations_cutoff : float) -> bool:
193 # Skip the test if we trust
194 if TRAJECTORY_INTEGRITY_FLAG in trust:
195 return True
197 # Skip the test if it is already passed according to the register
198 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):
199 return True
201 # Remove old warnings
202 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)
204 # Parse the selection in VMD selection syntax
205 parsed_selection = structure.select(check_selection, syntax='vmd')
207 # If there is nothing to check then warn the user and stop here
208 if not parsed_selection:
209 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')
211 # Discard PBC residues from the selection to be checked
212 parsed_selection -= pbc_selection
214 # If there is nothing to check then warn the user and stop here
215 if not parsed_selection:
216 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')
217 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')
218 return True
220 # Get fragments out of the parsed selection
221 # Fragments will be analyzed independently
222 # A small fragment may cause a small RMSD perturbation when it is part of a large structure
223 # Note that jumps of partial fragments along boundaries are rare imaging problems
224 # Usually are whole fragments the ones which jump
225 fragments = list(structure.find_fragments(parsed_selection))
227 print(f'Checking trajectory integrity ({len(fragments)} fragments)')
229 # Load the trajectory frame by frame
230 trajectory = mdt.iterload(input_trajectory_filename, top=input_structure_filename, chunk=1)
232 # Save the previous frame any time
233 previous_frame = next(trajectory)
235 # Save all RMSD jumps
236 fragment_rmsd_jumps = { fragment: [] for fragment in fragments }
238 # Add an extra breakline before the first log
239 print()
241 # Iterate trajectory frames
242 for f, frame in enumerate(trajectory, 1):
243 # Update the current frame log
244 reprint(f' Frame {f}')
246 # Iterate over the different fragments
247 for fragment in fragment_rmsd_jumps:
248 # Calculate RMSD value between previous and current frame
249 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries
250 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices
251 rmsd_value = mdt.rmsd(frame, previous_frame, atom_indices=fragment.atom_indices)[0]
252 fragment_rmsd_jumps[fragment].append(rmsd_value)
254 # Update the previous frame as the current one
255 previous_frame = frame
257 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps
258 # For this reason we allow the first frames to bypass the check
259 # As soon as one frame is below the cutoff the bypass is finished for the following frames
260 # Count the maximum number of bypassed frames and warn the user in case there are any
261 max_bypassed_frames = 0
263 # Capture outliers
264 # If we capture more than 5 we stop searching
265 outliers_count = 0
266 max_z_score = 0
267 max_z_score_frame = 0
269 # Iterate over the different fragments
270 for fragment, rmsd_jumps in fragment_rmsd_jumps.items():
272 # If the trajectory has only 1 or 2 frames then there is no test to do
273 if len(rmsd_jumps) <= 1:
274 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
275 return True
277 # Get the maximum RMSD value and check it is a reasonable deviation from the average values
278 # Otherwise, if it is an outlier, the test fails
279 mean_rmsd_jump = np.mean(rmsd_jumps)
280 stdv_rmsd_jump = np.std(rmsd_jumps)
282 # Count the number of bypassed frames for this fragment
283 bypassed_frames = 0
285 fragment_max_z_score = 0
286 fragment_max_z_score_frame = 0
288 for i, rmsd_jump in enumerate(rmsd_jumps, 1):
289 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )
290 # Keep track of the maixmum z score for this fragment
291 if z_score > fragment_max_z_score:
292 fragment_max_z_score = z_score
293 fragment_max_z_score_frame = i
294 # Keep track of the maixmum z score
295 if z_score > max_z_score:
296 max_z_score = z_score
297 max_z_score_frame = i
298 # If z score bypassed the limit then report it
299 if z_score > standard_deviations_cutoff:
300 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet
301 if i - 1 == bypassed_frames:
302 bypassed_frames += 1
303 continue
304 if outliers_count >= 4:
305 print(' etc...')
306 break
307 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}')
308 outliers_count += 1
310 print(f'Fragment {structure.name_selection(fragment)} -> {fragment_max_z_score} in frame {fragment_max_z_score_frame}')
312 # Update the maximum number of bypassed frames
313 if bypassed_frames > max_bypassed_frames:
314 max_bypassed_frames = bypassed_frames
316 # If there were any outlier then the check has failed
317 if outliers_count > 0:
318 # Add a warning an return True since the test failed in case we have mercy
319 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'
320 if TRAJECTORY_INTEGRITY_FLAG in mercy:
321 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)
322 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)
323 return False
324 # Otherwise kill the process right away
325 raise TestFailure(message)
327 # Always print the maximum z score and its frames
328 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}')
330 # Warn the user if we had bypassed frames
331 if max_bypassed_frames > 0:
332 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated')
334 print(' Test has passed successfully')
335 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
336 return True
338# Look for sudden raises of RMSd values from one frame to another
339# To do so, we check the RMSD of every frame using its previous frame as reference
340# DANI: Dependemos de que cambien el comportamiento de MDtraj para que esto funcione
341# DANI: https://github.com/mdtraj/mdtraj/issues/1966
342def check_trajectory_integrity_per_fragment_2 (
343 input_structure_filename : str,
344 input_trajectory_filename : str,
345 structure : 'Structure',
346 pbc_selection : 'Selection',
347 mercy : List[str],
348 trust: List[str],
349 register : 'Register',
350 #time_length : float,
351 check_selection : str,
352 # DANI: He visto saltos 'correctos' pasar de 6
353 # DANI: He visto saltos 'incorrectos' no bajar de 10
354 standard_deviations_cutoff : float) -> bool:
356 # Skip the test if we trust
357 if TRAJECTORY_INTEGRITY_FLAG in trust:
358 return True
360 # Skip the test if it is already passed according to the register
361 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):
362 return True
364 # Remove old warnings
365 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)
367 # Parse the selection in VMD selection syntax
368 parsed_selection = structure.select(check_selection, syntax='vmd')
370 # If there is nothing to check then warn the user and stop here
371 if not parsed_selection:
372 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')
374 # Discard PBC residues from the selection to be checked
375 parsed_selection -= pbc_selection
377 # If there is nothing to check then warn the user and stop here
378 if not parsed_selection:
379 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')
380 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')
381 return True
383 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps
384 # For this reason we allow the first frames to bypass the check
385 # As soon as one frame is below the cutoff the bypass is finished for the following frames
386 # Count the maximum number of bypassed frames and warn the user in case there are any
387 max_bypassed_frames = 0
389 # Get fragments out of the parsed selection
390 # Fragments will be analyzed independently
391 # A small fragment may cause a small RMSD perturbation when it is part of a large structure
392 # Note that jumps of partial fragments along boundaries are rare imaging problems
393 # Usually are whole fragments the ones which jump
394 fragments = list(structure.find_fragments(parsed_selection))
396 print(f'Checking trajectory integrity')
398 # Iterate over the different fragments
399 for f, fragment in enumerate(fragments, 1):
400 fragment_name = structure.name_selection(fragment)
401 print(f'Fragment {f}/{len(fragments)} -> {fragment_name}')
403 # Load the trajectory frame by frame
404 trajectory = mdt.iterload(
405 input_trajectory_filename,
406 top=input_structure_filename,
407 atom_indices=fragment.atom_indices,
408 chunk=1)
410 # Save all RMSD jumps
411 rmsd_jumps = []
413 # Save the previous frame any time
414 previous_frame = next(trajectory)
416 # Add an extra breakline before the first log
417 print()
419 # Iterate trajectory frames
420 for f, frame in enumerate(trajectory, 1):
421 # Update the current frame log
422 reprint(f' Frame {f}')
424 # Calculate RMSD value between previous and current frame
425 # DANI: El centrado de MDtraj elimina el salto a través de las boundaries
426 # DANI: El precentered=True debería evitarlo, pero es ignorado si hay atom_indices
427 rmsd_value = mdt.rmsd(frame, previous_frame, precentered=True)[0]
428 rmsd_jumps.append(rmsd_value)
430 # Update the previous frame as the current one
431 previous_frame = frame
433 # If the trajectory has only 1 or 2 frames then there is no test to do
434 if len(rmsd_jumps) <= 1:
435 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
436 return True
438 # Get the maximum RMSD value and check it is a reasonable deviation from the average values
439 # Otherwise, if it is an outlier, the test fails
440 mean_rmsd_jump = np.mean(rmsd_jumps)
441 stdv_rmsd_jump = np.std(rmsd_jumps)
443 # Count the number of bypassed frames for this fragment
444 bypassed_frames = 0
446 # Capture outliers
447 # If we capture more than 5 we stop searching
448 outliers_count = 0
449 max_z_score = 0
450 max_z_score_frame = 0
452 for i, rmsd_jump in enumerate(rmsd_jumps, 1):
453 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )
454 # Keep track of the maixmum z score
455 if z_score > max_z_score:
456 max_z_score = z_score
457 max_z_score_frame = i
458 # If z score bypassed the limit then report it
459 if z_score > standard_deviations_cutoff:
460 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet
461 if i - 1 == bypassed_frames:
462 bypassed_frames += 1
463 continue
464 if outliers_count >= 4:
465 print(' etc...')
466 break
467 print(f' FAIL: Sudden RMSD jump between frames {i} and {i+1}')
468 outliers_count += 1
470 # Always print the maximum z score and its frames
471 print(f' Maximum z score {max_z_score} reported between frames {max_z_score_frame} and {max_z_score_frame + 1}')
473 # Update the maximum number of bypassed frames
474 if bypassed_frames > max_bypassed_frames:
475 max_bypassed_frames = bypassed_frames
477 # If there were any outlier then the check has failed
478 if outliers_count > 0:
479 # Add a warning an return True since the test failed in case we have mercy
480 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'
481 if TRAJECTORY_INTEGRITY_FLAG in mercy:
482 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)
483 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)
484 return False
485 # Otherwise kill the process right away
486 raise TestFailure(message)
488 # Warn the user if we had bypassed frames
489 if max_bypassed_frames > 0:
490 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {max_bypassed_frames} frames may be not equilibrated')
492 print(' Test has passed successfully')
493 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
494 return True
496# Compute every residue RMSD to check if there are sudden jumps along the trajectory
497# HARDCODE: This function is not fully implemented but enabled manually for specific cases
498def check_trajectory_integrity_per_residue (
499 input_structure_filename : str,
500 input_trajectory_filename : str,
501 structure : 'Structure',
502 pbc_selection : 'Selection',
503 mercy : List[str],
504 trust: List[str],
505 register : 'Register',
506 #time_length : float,
507 check_selection : str,
508 # DANI: He visto saltos 'correctos' pasar de 11
509 # DANI: He visto saltos 'incorrectos' no bajar de 14
510 standard_deviations_cutoff : float):
512 # HARDCODE: The default value does not work for a single residue
513 standard_deviations_cutoff = 12
515 # Skip the test if we trust
516 if TRAJECTORY_INTEGRITY_FLAG in trust:
517 return True
519 # Skip the test if it is already passed according to the register
520 if register.tests.get(TRAJECTORY_INTEGRITY_FLAG, None):
521 return True
523 # Remove old warnings
524 register.remove_warnings(TRAJECTORY_INTEGRITY_FLAG)
526 # Parse the selection in VMD selection syntax
527 parsed_selection = structure.select(check_selection, syntax='vmd')
529 # If there is nothing to check then warn the user and stop here
530 if not parsed_selection:
531 raise Exception('WARNING: There are not atoms to be analyzed for the RMSD analysis')
533 # Discard PBC residues from the selection to be checked
534 parsed_selection -= pbc_selection
536 # If there is nothing to check then warn the user and stop here
537 if not parsed_selection:
538 warn('There are no atoms to be analyzed for the RMSD checking after PBC substraction')
539 register.update_test(TRAJECTORY_INTEGRITY_FLAG, 'na')
540 return True
542 # We must filter out residues which only have 1 atom (e.g. ions)
543 # This is because sometimes pytraj does not return results for them and then the number of results and residues does not match
544 # More info: https://github.com/Amber-MD/pytraj/issues/1580
545 ion_atom_indices = []
546 for residue in structure.residues:
547 if len(residue.atom_indices) == 1:
548 ion_atom_indices += residue.atom_indices
549 ions_selection = structure.select_atom_indices(ion_atom_indices)
550 parsed_selection -= ions_selection
552 # Parse the selection to a pytraj mask
553 pytraj_selection = parsed_selection.to_pytraj()
555 # Calculate the residue indices of the overall structure remaining in the filtered trajectory
556 residue_indices = structure.get_selection_residue_indices(parsed_selection)
557 n_residues = len(residue_indices)
559 print('Checking trajectory integrity per residue')
561 # Parse the trajectory into pytraj and apply the mask
562 # NEVER FORGET: The pytraj iterload does not accept a mask, but we apply the mask later in the analysis
563 pt_trajectory = get_pytraj_trajectory(input_structure_filename, input_trajectory_filename, atom_selection = parsed_selection)
565 # Make sure the expected output number of residues to match with the pytraj results
566 # These numbers may not match when ions are included so we better check
567 # NEVER FORGET: The pytraj TrajectoryIterator is not an iterator
568 first_frame = pt_trajectory[0:1]
569 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None"
570 # DANI: However there is no problem and the analysis runs flawlessly
571 # DANI: For this reason we call this function with no resname and then we remove the log
572 data_sample = pt.rmsd_perres(first_frame, ref=first_frame, perres_mask=pytraj_selection)
573 # We remove the previous error log
574 delete_previous_log()
575 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001'
576 del data_sample[0]
577 if n_residues != len(data_sample):
578 raise ValueError(f'Number of target residues ({n_residues}) does not match number of residues in data ({len(data_sample)})')
580 # Saving all RMSD jumps may take a lot of memory
581 # Instead we will store the sum of values and the maximum
582 # This way we can caluclate the average value at the end and check if the maximum is too far from it
583 rmsd_per_residue_per_frame = []
585 # Add an extra breakline before the first log
586 print()
588 # Iterate trajectory frames
589 previous_frame_trajectory = first_frame
590 frame_number = 1
591 for frame in pt_trajectory:
592 # Update the current frame log
593 reprint(f' Frame {frame_number}')
594 # Set a pytraj trajectory out of a pytraj frame
595 frame_trajectory = pt.Trajectory(top=pt_trajectory.top)
596 frame_trajectory.append(frame)
597 # Run the analysis in pytraj
598 # The result data is a custom pytraj class: pytraj.datasets.datasetlist.DatasetList
599 # This class has keys but its attributes can not be accessed through the key
600 # They must be accessed thorugh the index
601 # DANI: When the 'resname' argument is missing it prints "Error: Range::SetRange(None): Range is -1 for None"
602 # DANI: However there is no problem and the analysis runs flawlessly
603 # DANI: Adding resrage as a list/range was tried and did not work, only string works
604 # DANI: Adding a string resrange however strongly impacts the speed when this function is called repeatedly
605 # DANI: For this reason we call this function with no resname and then we remove the log
606 rmsd_per_residue = pt.rmsd_perres(frame_trajectory, ref=previous_frame_trajectory, mask=pytraj_selection)
607 # We remove the previous error log
608 delete_previous_log()
609 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001'
610 del rmsd_per_residue[0]
611 # Check we have no NaNs
612 if np.isnan(rmsd_per_residue[0][0]):
613 raise ValueError(f'We are having NaNs at frame {frame_number}')
614 # Add last values to the list
615 rmsd_per_residue_per_frame.append(rmsd_per_residue)
616 # rmsd_per_residue_per_frame[:, frame]
617 # Now update data for every residue
618 # for index, residue_rmsd in enumerate(rmsd_per_residue):
619 # current_rmsd = residue_rmsd[0]
620 # # Get the current residue rmsd data
621 # total_rmsd = rmsd_per_residue_per_frame[index]
622 # total_rmsd['accumulated'] += current_rmsd
623 # total_rmsd['maximum'] = max(total_rmsd['maximum'], current_rmsd)
624 # Update previous coordinates
625 previous_frame_trajectory = frame_trajectory
626 # Update the frame_number
627 frame_number += 1
629 # If the trajectory has only 1 or 2 frames then there is no test to do
630 n_jumps = len(rmsd_per_residue_per_frame)
631 if n_jumps <= 1:
632 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
633 return True
635 # Keep the overall maximum z score, and its residue and frame for the logs
636 overall_max_z_score = 0
637 overall_max_z_score_frame = None
638 overall_max_z_score_residue = None
640 # Keep the overall maximum bypassed frames number
641 overall_bypassed_frames = 0
643 # Keep the overall count of residues with outliers
644 overall_outliered_residues = 0
646 # Add an extra breakline before the next log
647 print()
649 # Now check there are not sudden jumps for each residue separattely
650 for residue_number in range(n_residues):
651 reprint(f' Residue {residue_number+1}')
652 # Get the rmsd jumps for each frame for this specific residue
653 rmsd_jumps = [ frame[residue_number] for frame in rmsd_per_residue_per_frame ]
655 # Get the maximum RMSD value and check it is a reasonable deviation from the average values
656 # Otherwise, if it is an outlier, the test fails
657 mean_rmsd_jump = np.mean(rmsd_jumps)
658 stdv_rmsd_jump = np.std(rmsd_jumps)
660 # First frames may not be perfectly equilibrated and thus have stronger RMSD jumps
661 # For this reason we allow the first frames to bypass the check
662 # As soon as one frame is below the cutoff the bypass is finished for the following frames
663 # Count the number of bypassed frames and warn the user in case there are any
664 bypassed_frames = 0
666 # Capture outliers
667 # If we capture more than 5 we stop searching
668 outliers_count = 0
669 max_z_score = 0
670 max_z_score_frame = 0
671 for i, rmsd_jump in enumerate(rmsd_jumps, 1):
672 z_score = abs( (rmsd_jump - mean_rmsd_jump) / stdv_rmsd_jump )
673 # Keep track of the maixmum z score
674 if z_score > max_z_score:
675 max_z_score = z_score
676 max_z_score_frame = i
677 # If z score bypassed the limit then report it
678 if z_score > standard_deviations_cutoff:
679 # If there are as many bypassed frames as the index then it means no frame has passed the cutoff yet
680 if i - 1 == bypassed_frames:
681 bypassed_frames += 1
682 continue
683 # Otherwise we consider this as an outlier and thus the test has failed
684 # However we keep checking just to find and report the highest outlier
685 outliers_count += 1
687 # Update the overall bypassed frames if we overcomed it
688 if overall_bypassed_frames < bypassed_frames:
689 overall_bypassed_frames = bypassed_frames
691 # Update the overall max z score if we overcome it
692 if max_z_score > overall_max_z_score:
693 overall_max_z_score = max_z_score
694 overall_max_z_score_frame = max_z_score_frame
695 overall_max_z_score_residue = residue_number
697 # If there were any outlier then add one to the overall count
698 if outliers_count > 0:
699 overall_outliered_residues += 1
701 # Always print the overall maximum z score and its frames and residue
702 overall_max_z_score_residue_label = pt_trajectory.top.residue(overall_max_z_score_residue)
703 print(f' Maximum z score {overall_max_z_score} reported for residue {overall_max_z_score_residue_label} between frames {overall_max_z_score_frame} and {overall_max_z_score_frame + 1}')
705 # If there were any outlier then the check has failed
706 if overall_outliered_residues > 0:
707 # Add a warning an return True since the test failed in case we have mercy
708 message = 'RMSD check has failed: there may be sudden jumps along the trajectory'
709 if TRAJECTORY_INTEGRITY_FLAG in mercy:
710 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, message)
711 register.update_test(TRAJECTORY_INTEGRITY_FLAG, False)
712 return False
713 # Otherwise kill the process right away
714 raise TestFailure(message)
716 # Warn the user if we had bypassed frames
717 if overall_bypassed_frames > 0:
718 register.add_warning(TRAJECTORY_INTEGRITY_FLAG, f'First {overall_bypassed_frames} frames may be not equilibrated')
720 print(' Test has passed successfully')
721 register.update_test(TRAJECTORY_INTEGRITY_FLAG, True)
722 return True