Coverage for model_workflow/analyses/rmsd_per_residue.py: 84%

32 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import pytraj as pt 

2 

3from distutils.version import StrictVersion 

4 

5from model_workflow.utils.pyt_spells import get_reduced_pytraj_trajectory 

6from model_workflow.utils.auxiliar import save_json 

7from model_workflow.utils.constants import OUTPUT_RMSD_PERRES_FILENAME 

8from model_workflow.utils.type_hints import * 

9 

10 

11def rmsd_per_residue ( 

12 structure_file : str, 

13 trajectory_file : 'File', 

14 output_directory : str, 

15 structure : 'Structure', 

16 pbc_selection : 'Selection', 

17 snapshots : int, 

18 frames_limit : int = 100): 

19 """Perform the RMSD analysis for each residue.""" 

20 

21 # Set the main output filepath 

22 output_analysis_filepath = f'{output_directory}/{OUTPUT_RMSD_PERRES_FILENAME}' 

23 

24 # Parse the trajectory intro pytraj 

25 # Reduce it in case it exceeds the frames limit 

26 pt_trajectory, frame_step, frames_count = get_reduced_pytraj_trajectory( 

27 structure_file.path, trajectory_file.path, snapshots, frames_limit) 

28 

29 # We must exclude here PBC residues from the analysis 

30 # PBC residues may jump through boundaries thus having non-sense high RMSD values 

31 # Filter the trajectory with atoms to be included in the analysis 

32 filter_in_selection = structure.invert_selection(pbc_selection) 

33 

34 # Calculate the residue indices of the overall structure remaining in the filtered trajectory 

35 residue_indices = structure.get_selection_residue_indices(filter_in_selection) 

36 # Count the number of residues 

37 residue_count = len(residue_indices) 

38 

39 # Make sure we have more than 1 residue left at this point 

40 # Otherwise it makes not sense running this analysis for a single residue 

41 if residue_count <= 1: 

42 tail = 'no residues at all' if residue_count == 0 else 'one residue only' 

43 print(' The RMSD per residue analysis will be skipped since it would be applied to ' + tail) 

44 return 

45 

46 # Convert the selection to a pytraj mask 

47 pytraj_selection = filter_in_selection.to_pytraj() 

48 # Apply the mask to the trajectory 

49 filtered_pt_trajectory = pt_trajectory[pytraj_selection] 

50 # WARNING: This extra line prevents the error "Segment violation (core dumped)" in some pdbs 

51 # This happens with some random pdbs which pytraj considers to have 0 Mols 

52 # More info: https://github.com/Amber-MD/cpptraj/pull/820 

53 # IMPORTANT: This is critical for pytraj <= 2.0.5 but it makes the code fail for pytraj 2.0.6 

54 if StrictVersion(pt.__version__) <= StrictVersion('2.0.5'): 

55 filtered_pt_trajectory.top.start_new_mol() 

56 

57 # Run the analysis in pytraj 

58 # The result data is a custom pytraj class: pytraj.datasets.datasetlist.DatasetList 

59 # This class has keys but its attributes can not be accessed through the key 

60 # They must be accessed through the index 

61 # IMPORTANT 

62 # Note that the argument 'resrange' may be redundant, since we always target for all residues 

63 # If this argument is not passed then it prints "Error: Range::SetRange(None): Range is -1 for None" 

64 # However, most of the times there is no problem and the analysis runs flawlessly anyway 

65 # We started to have problems with some ions but then we had no results at all with coarse grain 

66 # More info: https://github.com/Amber-MD/pytraj/issues/1580 

67 resrange = f'1-{structure.residue_count + 1}' 

68 data = pt.rmsd_perres(filtered_pt_trajectory, resrange=resrange) 

69 

70 # We remove the first result, which is meant to be the whole rmsd and whose key is 'RMSD_00001' 

71 del data[0] 

72 

73 # Check the expected output number of residues to match with the pytraj results 

74 if residue_count != len(data): 

75 raise ValueError(f'Number of residues in structure ({residue_count}) does not match number of residues in RMSD-perres results ({len(data)})') 

76 

77 # Mine the analysis data 

78 whole_structure_residues_count = len(structure.residues) 

79 rmsd_per_residue = [ None ] * whole_structure_residues_count 

80 for index, residue_data in enumerate(data): 

81 # Get the actual residue index of the current data 

82 residue_index = residue_indices[index] 

83 # Add current data to its corresponding position in the 'per residue' list 

84 rmsd_per_residue[residue_index] = list(residue_data) 

85 

86 # Export the analysis in json format 

87 output_analysis = { 'step': frame_step, 'rmsdpr': rmsd_per_residue } 

88 save_json(output_analysis, output_analysis_filepath)