Coverage for model_workflow/utils/remote.py: 65%
108 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1import urllib.request
2import json
3from tqdm import tqdm
4from model_workflow.utils.auxiliar import load_json, save_json, InputError
5from model_workflow.utils.type_hints import *
7class Remote:
8 def __init__ (self, database_url : str, accession : str):
9 # Save input arguments
10 self.database_url = database_url
11 self.accession = accession
12 # Set the URL
13 self.url = f'{database_url}rest/current/projects/{accession}'
14 # Set internal variables
15 self._project_data = None
16 self._available_files = None
17 # Download project data to make sure we have database access and the project exists
18 self.get_project_data()
20 # Get project data
21 # This is only used to make sure the project exists by now
22 def get_project_data (self):
23 # Return the internal value if we already have it
24 if self._project_data != None:
25 return self._project_data
26 # Otherwise request the project data to the API
27 try:
28 response = urllib.request.urlopen(self.url)
29 self._project_data = json.loads(response.read())
30 except urllib.error.HTTPError as error:
31 # Try to provide comprehensive error logs depending on the error
32 # If project was not found
33 if error.code == 404:
34 raise InputError(f'Remote project "{self.accession}" not found')
35 # If we don't know the error then simply say something went wrong
36 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error))
37 except:
38 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error))
40 # Get available files in the remove project
41 def get_available_files (self):
42 # Return the internal value if we already have it
43 if self._available_files != None:
44 return self._available_files
45 # Otherwise request the available files to the API
46 request_url = self.url + '/files'
47 try:
48 response = urllib.request.urlopen(request_url)
49 self._available_files = json.loads(response.read())
50 except:
51 raise Exception('Something went wrong when requesting available files: ' + request_url)
52 return self._available_files
53 available_files = property(get_available_files, None, None, "Remote available files (read only)")
55 # Download a specific file
56 def download_file (self, output_file : 'File'):
57 request_url = f'{self.url}/files/{output_file.filename}'
58 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n')
59 try:
60 urllib.request.urlretrieve(request_url, output_file.path)
61 except urllib.error.HTTPError as error:
62 # Try to provide comprehensive error logs depending on the error
63 # If file was not found
64 if error.code == 404:
65 raise Exception(f'Missing remote file "{output_file.filename}"')
66 # If we don't know the error then simply say something went wrong
67 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url)
69 # Download the project standard topology
70 def download_standard_topology (self, output_file : 'File'):
71 request_url = self.url + '/topology'
72 print(f'Downloading standard topology ({output_file.path})\n')
73 try:
74 urllib.request.urlretrieve(request_url, output_file.path)
75 except:
76 raise Exception('Something went wrong when downloading the standard topology: ' + request_url)
78 # Download the standard structure
79 def download_standard_structure (self, output_file : 'File'):
80 request_url = self.url + '/structure'
81 print(f'Downloading standard structure ({output_file.path})\n')
82 try:
83 urllib.request.urlretrieve(request_url, output_file.path)
84 except:
85 raise Exception('Something went wrong when downloading the standard structure: ' + request_url)
87 # Download the main trajectory
88 def download_trajectory (self,
89 output_file : 'File',
90 frame_selection : Optional[str] = None,
91 atom_selection : Optional[str] = None,
92 format : Optional[str] = None
93 ):
94 if [frame_selection, atom_selection, format] == [None,None,'xtc']:
95 # If we dont have a specific request, we can download the main trajectory
96 # directly from the trajectory.xtc file so it is faster
97 request_url = f'{self.url}/files/trajectory.xtc'
98 else:
99 # Set the base URL
100 request_url = self.url + '/trajectory'
101 # Additional arguments to be included in the URL
102 arguments = []
103 if frame_selection:
104 arguments.append(f'frames={frame_selection}')
105 if atom_selection:
106 arguments.append(f'atoms={atom_selection}')
107 if format:
108 arguments.append(f'format={format}')
109 if len(arguments) > 0:
110 request_url += '?' + '&'.join(arguments)
111 # Send the request
112 print(f'Downloading main trajectory ({output_file.path})')
113 try:
114 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024,
115 miniters = 1, desc = ' Progress', leave=False) as t:
116 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t))
117 except:
118 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url)
120 # Download the inputs file
121 def download_inputs_file (self, output_file : 'File'):
122 request_url = self.url + '/inputs'
123 # In case this is a json file we must specify the format in the query
124 is_json = output_file.format == 'json'
125 if is_json:
126 request_url += '?format=json'
127 # Send the request
128 print(f'Downloading inputs file ({output_file.path})\n')
129 try:
130 urllib.request.urlretrieve(request_url, output_file.path)
131 except:
132 raise Exception('Something went wrong when downloading the inputs file: ' + request_url)
133 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation)
134 if is_json:
135 file_content = load_json(output_file.path)
136 save_json(file_content, output_file.path, indent = 4)
138 # Get analysis data
139 def download_analysis_data(self, analysis_type: str, output_file: 'File'):
140 request_url = f'{self.url}/analyses/{analysis_type}'
141 print(f'Downloading {analysis_type} analysis data\n')
142 try:
143 urllib.request.urlretrieve(request_url, output_file.path)
144 # Format JSON if needed
145 file_content = load_json(output_file.path)
146 save_json(file_content, output_file.path, indent=4)
147 except:
148 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}')
150# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5
151def my_hook(t):
152 """Wraps tqdm instance.
153 Don't forget to close() or __exit__()
154 the tqdm instance once you're done with it (easiest using `with` syntax).
155 Example
156 -------
157 >>> with tqdm(...) as t:
158 ... reporthook = my_hook(t)
159 ... urllib.urlretrieve(..., reporthook=reporthook)
160 """
161 last_b = [0]
163 def update_to(b=1, bsize=1, tsize=None):
164 """
165 b : int, optional
166 Number of blocks transferred so far [default: 1].
167 bsize : int, optional
168 Size of each block (in tqdm units) [default: 1].
169 tsize : int, optional
170 Total size (in tqdm units). If [default: None] remains unchanged.
171 """
172 if tsize is not None:
173 t.total = tsize
174 t.update((b - last_b[0]) * bsize)
175 last_b[0] = b
177 return update_to