Coverage for model_workflow/analyses/sasa.py: 87%
70 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1from model_workflow.tools.xvg_parse import xvg_parse
2from model_workflow.tools.get_pdb_frames import get_pdb_frames
3from model_workflow.utils.auxiliar import save_json
4from model_workflow.utils.constants import GROMACS_EXECUTABLE, OUTPUT_SASA_FILENAME
5from model_workflow.utils.type_hints import *
7import os
8import numpy
9from subprocess import run, PIPE, Popen
11# This is a residual file produced by the sasa analysis
12# It must be deleted after each
13area_filename = 'area.xvg'
15def sasa(
16 structure_file: 'File',
17 trajectory_file: 'File',
18 output_directory: str,
19 structure : 'Structure',
20 pbc_residues : List[int],
21 snapshots : int,
22 frames_limit : int = 100,
23):
24 """Perform the Solvent Accessible Surface Analysis."""
26 # If all residues are to be excluded since the whole system is in PCB then stop here
27 if len(pbc_residues) == len(structure.residues):
28 print(' No residues to run the analysis')
29 return
31 # Set the output filepath
32 output_analysis_filepath = f'{output_directory}/{OUTPUT_SASA_FILENAME}'
34 # For this analysis me must filter out hydrogens
35 heavy_atoms_selection = '( not name "H.*" )'
37 # Execute the atom selection over the structure
38 structure_selection = structure.select(heavy_atoms_selection, syntax='vmd')
40 # At this point the number of residues between the original and the non-hydrogens structure should match
41 # We rely on this, so we must check
42 filtered_structure = structure.filter(structure_selection)
43 filtered_residues_count = len(filtered_structure.residues)
44 original_residues_count = len(structure.residues)
45 if filtered_residues_count != original_residues_count:
46 raise ValueError('The number of residues does not match after filtering out hydrogens')
48 # Convert the structure selection to a ndx file
49 selection_name = 'sasa'
50 ndx_selection = structure_selection.to_ndx(selection_name)
51 ndx_filename = 'indices.ndx'
52 with open(ndx_filename, 'w') as file:
53 file.write(ndx_selection)
55 # We must exclude PBC residues (e.g. membranes) from sasa results
56 # PBC residues close to the boundary will always have unrealistic sasa values
57 # WARNING: These results must be exlucded from the analysis afterwards, but the membrane can not be excluded from the structure
58 # Otherwise, those residues which are covered by the membrane would be exposed to the solvent during the analysis
59 skipped_residue_indices = pbc_residues
61 # Calculate the sasa for each frame
62 sasa_per_frame = []
63 frames, step, count = get_pdb_frames(structure_file.path, trajectory_file.path, snapshots, frames_limit)
64 for f, current_frame in enumerate(frames):
66 # Run the sasa analysis over the current frame
67 # WARNING: We want the SASA per residue, and this could be obtained replacing '-oa' per -'or'
68 # WARNING: However, residues are not enumerated the same in Gromacs and other tools (e.g. pytraj)
69 # For this reason we must always rely on atom numeration, which is the same along different tools
70 current_frame_sasa = 'sasa' + str(f) + '.xvg'
71 logs = run([
72 GROMACS_EXECUTABLE,
73 "sasa",
74 "-s",
75 current_frame,
76 '-oa',
77 current_frame_sasa,
78 "-n",
79 ndx_filename,
80 "-surface",
81 "0",
82 '-quiet'
83 ], stdout=PIPE, stderr=PIPE).stdout.decode()
85 # In case the output file does not exist at this point it means something went wrong with Gromacs
86 if not os.path.exists(current_frame_sasa):
87 print(logs)
88 raise SystemExit('Something went wrong with Gromacs')
90 # Mine the sasa results (.xvg file)
91 # Hydrogen areas are not recorded in the xvg file
92 sasa = xvg_parse(current_frame_sasa, ['n', 'area', 'sd'])
93 # Restructure data by adding all atoms sas per residue
94 atom_numbers = sasa['n']
95 atom_areas = sasa['area']
96 sas_per_residues = [0.0] * len(structure.residues)
97 for atom_number, atom_area in zip(atom_numbers, atom_areas):
98 atom_index = int(atom_number) - 1
99 atom = structure.atoms[atom_index]
100 residue_index = atom.residue_index
101 sas_per_residues[residue_index] += atom_area
102 sasa_per_frame.append(sas_per_residues)
103 # Delete current frame files before going for the next frame
104 run([
105 "rm",
106 current_frame_sasa,
107 area_filename,
108 ], stdout=PIPE).stdout.decode()
110 # Remove the indices file since it is not required anymore
111 os.remove(ndx_filename)
113 # Format output data
114 # Sasa values must be separated by residue and then ordered by frame
115 saspf = []
116 means = []
117 stdvs = []
118 for r, residue in enumerate(structure.residues):
119 # Skip membrane residues
120 if r in skipped_residue_indices:
121 saspf.append(None)
122 means.append(None)
123 stdvs.append(None)
124 continue
125 # Get the number of atoms in current residue
126 atom_count = len(residue.atoms)
127 # Harvest its sasa along each frame
128 residue_saspf = []
129 for frame in sasa_per_frame:
130 # IMPORTANT: The original SASA value is modified to be normalized
131 # We divide the value by the number of atoms
132 frame_sas = frame[r]
133 normalized_frame_sas = frame_sas / atom_count
134 # To make is standard with the rest of analyses we pass the results from nm² to A²
135 standard_frame_sas = normalized_frame_sas * 100
136 residue_saspf.append(standard_frame_sas)
137 # Add current resiude sas per frame to the overall list
138 saspf.append(residue_saspf)
139 # Calculate the mean and standard deviation of the residue sasa values
140 mean = numpy.mean(residue_saspf)
141 means.append(mean)
142 stdv = numpy.std(residue_saspf)
143 stdvs.append(stdv)
144 # Set the content of the analysis output file
145 output = {
146 'step': step,
147 'saspf': saspf,
148 'means': means,
149 'stdvs': stdvs,
150 }
152 # Export the analysis in json format
153 save_json(output, output_analysis_filepath)