Coverage for model_workflow/analyses/sasa.py: 87%

70 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1from model_workflow.tools.xvg_parse import xvg_parse 

2from model_workflow.tools.get_pdb_frames import get_pdb_frames 

3from model_workflow.utils.auxiliar import save_json 

4from model_workflow.utils.constants import GROMACS_EXECUTABLE, OUTPUT_SASA_FILENAME 

5from model_workflow.utils.type_hints import * 

6 

7import os 

8import numpy 

9from subprocess import run, PIPE, Popen 

10 

11# This is a residual file produced by the sasa analysis 

12# It must be deleted after each 

13area_filename = 'area.xvg' 

14 

15def sasa( 

16 structure_file: 'File', 

17 trajectory_file: 'File', 

18 output_directory: str, 

19 structure : 'Structure', 

20 pbc_residues : List[int], 

21 snapshots : int, 

22 frames_limit : int = 100, 

23): 

24 """Perform the Solvent Accessible Surface Analysis.""" 

25 

26 # If all residues are to be excluded since the whole system is in PCB then stop here 

27 if len(pbc_residues) == len(structure.residues): 

28 print(' No residues to run the analysis') 

29 return 

30 

31 # Set the output filepath 

32 output_analysis_filepath = f'{output_directory}/{OUTPUT_SASA_FILENAME}' 

33 

34 # For this analysis me must filter out hydrogens 

35 heavy_atoms_selection = '( not name "H.*" )' 

36 

37 # Execute the atom selection over the structure 

38 structure_selection = structure.select(heavy_atoms_selection, syntax='vmd') 

39 

40 # At this point the number of residues between the original and the non-hydrogens structure should match 

41 # We rely on this, so we must check 

42 filtered_structure = structure.filter(structure_selection) 

43 filtered_residues_count = len(filtered_structure.residues) 

44 original_residues_count = len(structure.residues) 

45 if filtered_residues_count != original_residues_count: 

46 raise ValueError('The number of residues does not match after filtering out hydrogens') 

47 

48 # Convert the structure selection to a ndx file 

49 selection_name = 'sasa' 

50 ndx_selection = structure_selection.to_ndx(selection_name) 

51 ndx_filename = 'indices.ndx' 

52 with open(ndx_filename, 'w') as file: 

53 file.write(ndx_selection) 

54 

55 # We must exclude PBC residues (e.g. membranes) from sasa results 

56 # PBC residues close to the boundary will always have unrealistic sasa values 

57 # WARNING: These results must be exlucded from the analysis afterwards, but the membrane can not be excluded from the structure 

58 # Otherwise, those residues which are covered by the membrane would be exposed to the solvent during the analysis 

59 skipped_residue_indices = pbc_residues 

60 

61 # Calculate the sasa for each frame 

62 sasa_per_frame = [] 

63 frames, step, count = get_pdb_frames(structure_file.path, trajectory_file.path, snapshots, frames_limit) 

64 for f, current_frame in enumerate(frames): 

65 

66 # Run the sasa analysis over the current frame 

67 # WARNING: We want the SASA per residue, and this could be obtained replacing '-oa' per -'or' 

68 # WARNING: However, residues are not enumerated the same in Gromacs and other tools (e.g. pytraj) 

69 # For this reason we must always rely on atom numeration, which is the same along different tools 

70 current_frame_sasa = 'sasa' + str(f) + '.xvg' 

71 logs = run([ 

72 GROMACS_EXECUTABLE, 

73 "sasa", 

74 "-s", 

75 current_frame, 

76 '-oa', 

77 current_frame_sasa, 

78 "-n", 

79 ndx_filename, 

80 "-surface", 

81 "0", 

82 '-quiet' 

83 ], stdout=PIPE, stderr=PIPE).stdout.decode() 

84 

85 # In case the output file does not exist at this point it means something went wrong with Gromacs 

86 if not os.path.exists(current_frame_sasa): 

87 print(logs) 

88 raise SystemExit('Something went wrong with Gromacs') 

89 

90 # Mine the sasa results (.xvg file) 

91 # Hydrogen areas are not recorded in the xvg file 

92 sasa = xvg_parse(current_frame_sasa, ['n', 'area', 'sd']) 

93 # Restructure data by adding all atoms sas per residue 

94 atom_numbers = sasa['n'] 

95 atom_areas = sasa['area'] 

96 sas_per_residues = [0.0] * len(structure.residues) 

97 for atom_number, atom_area in zip(atom_numbers, atom_areas): 

98 atom_index = int(atom_number) - 1 

99 atom = structure.atoms[atom_index] 

100 residue_index = atom.residue_index 

101 sas_per_residues[residue_index] += atom_area 

102 sasa_per_frame.append(sas_per_residues) 

103 # Delete current frame files before going for the next frame 

104 run([ 

105 "rm", 

106 current_frame_sasa, 

107 area_filename, 

108 ], stdout=PIPE).stdout.decode() 

109 

110 # Remove the indices file since it is not required anymore 

111 os.remove(ndx_filename) 

112 

113 # Format output data 

114 # Sasa values must be separated by residue and then ordered by frame 

115 saspf = [] 

116 means = [] 

117 stdvs = [] 

118 for r, residue in enumerate(structure.residues): 

119 # Skip membrane residues 

120 if r in skipped_residue_indices: 

121 saspf.append(None) 

122 means.append(None) 

123 stdvs.append(None) 

124 continue 

125 # Get the number of atoms in current residue 

126 atom_count = len(residue.atoms) 

127 # Harvest its sasa along each frame 

128 residue_saspf = [] 

129 for frame in sasa_per_frame: 

130 # IMPORTANT: The original SASA value is modified to be normalized 

131 # We divide the value by the number of atoms 

132 frame_sas = frame[r] 

133 normalized_frame_sas = frame_sas / atom_count 

134 # To make is standard with the rest of analyses we pass the results from nm² to A² 

135 standard_frame_sas = normalized_frame_sas * 100 

136 residue_saspf.append(standard_frame_sas) 

137 # Add current resiude sas per frame to the overall list 

138 saspf.append(residue_saspf) 

139 # Calculate the mean and standard deviation of the residue sasa values 

140 mean = numpy.mean(residue_saspf) 

141 means.append(mean) 

142 stdv = numpy.std(residue_saspf) 

143 stdvs.append(stdv) 

144 # Set the content of the analysis output file 

145 output = { 

146 'step': step, 

147 'saspf': saspf, 

148 'means': means, 

149 'stdvs': stdvs, 

150 } 

151 

152 # Export the analysis in json format 

153 save_json(output, output_analysis_filepath)