Coverage for mddb_workflow/utils/remote.py: 69%
108 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1import urllib.request
2import json
3from tqdm import tqdm
4from mddb_workflow.utils.auxiliar import load_json, save_json, InputError
5from mddb_workflow.utils.type_hints import *
8class Remote:
9 def __init__(self, database_url: str, accession: str):
10 # Save input arguments
11 self.database_url = database_url
12 self.accession = accession
13 # Set the URL
14 self.url = f'{database_url}rest/current/projects/{accession}'
15 # Set internal variables
16 self._project_data = None
17 self._available_files = None
18 # Download project data to make sure we have database access and the project exists
19 self.get_project_data()
21 # Get project data
22 # This is only used to make sure the project exists by now
23 def get_project_data(self):
24 # Return the internal value if we already have it
25 if self._project_data != None:
26 return self._project_data
27 # Otherwise request the project data to the API
28 try:
29 response = urllib.request.urlopen(self.url)
30 self._project_data = json.loads(response.read())
31 except urllib.error.HTTPError as error:
32 # Try to provide comprehensive error logs depending on the error
33 # If project was not found
34 if error.code == 404:
35 raise InputError(f'Remote project "{self.accession}" not found')
36 # If we don't know the error then simply say something went wrong
37 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error))
38 except:
39 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error))
41 # Get available files in the remove project
42 def get_available_files (self):
43 # Return the internal value if we already have it
44 if self._available_files != None:
45 return self._available_files
46 # Otherwise request the available files to the API
47 request_url = self.url + '/files'
48 try:
49 response = urllib.request.urlopen(request_url)
50 self._available_files = json.loads(response.read())
51 except:
52 raise Exception('Something went wrong when requesting available files: ' + request_url)
53 return self._available_files
54 available_files = property(get_available_files, None, None, "Remote available files (read only)")
56 def download_file (self, output_file : 'File'):
57 """Download a specific file from the project/files endpoint."""
58 request_url = f'{self.url}/files/{output_file.filename}'
59 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n')
60 try:
61 urllib.request.urlretrieve(request_url, output_file.path)
62 except urllib.error.HTTPError as error:
63 if error.code == 404:
64 raise Exception(f'Missing remote file "{output_file.filename}"')
65 # If we don't know the error then simply say something went wrong
66 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url)
68 # Download the project standard topology
69 def download_standard_topology (self, output_file : 'File'):
70 request_url = self.url + '/topology'
71 print(f'Downloading standard topology ({output_file.path})\n')
72 try:
73 urllib.request.urlretrieve(request_url, output_file.path)
74 except Exception as error:
75 raise Exception('Something went wrong when downloading the standard topology: ' + request_url, 'with error: ' + str(error))
77 # Download the standard structure
78 def download_standard_structure (self, output_file : 'File'):
79 request_url = self.url + '/structure'
80 print(f'Downloading standard structure ({output_file.path})\n')
81 try:
82 urllib.request.urlretrieve(request_url, output_file.path)
83 except Exception as error:
84 raise Exception('Something went wrong when downloading the standard structure: ' + request_url, 'with error: ' + str(error))
86 # Download the main trajectory
87 def download_trajectory (self,
88 output_file : 'File',
89 frame_selection : Optional[str] = None,
90 atom_selection : Optional[str] = None,
91 format : Optional[str] = None
92 ):
93 if [frame_selection, atom_selection, format] == [None,None,'xtc']:
94 # If we dont have a specific request, we can download the main trajectory
95 # directly from the trajectory.xtc file so it is faster
96 request_url = f'{self.url}/files/trajectory.xtc'
97 else:
98 # Set the base URL
99 request_url = self.url + '/trajectory'
100 # Additional arguments to be included in the URL
101 arguments = []
102 if frame_selection:
103 arguments.append(f'frames={frame_selection}')
104 if atom_selection:
105 arguments.append(f'atoms={atom_selection}')
106 if format:
107 arguments.append(f'format={format}')
108 if len(arguments) > 0:
109 request_url += '?' + '&'.join(arguments)
110 # Send the request
111 print(f'Downloading main trajectory ({output_file.path})')
112 try:
113 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024,
114 miniters = 1, desc = ' Progress', leave=False) as t:
115 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t))
116 except Exception as error:
117 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url, 'with error: ' + str(error))
119 # Download the inputs file
120 def download_inputs_file (self, output_file : 'File'):
121 request_url = self.url + '/inputs'
122 # In case this is a json file we must specify the format in the query
123 is_json = output_file.format == 'json'
124 if is_json:
125 request_url += '?format=json'
126 # Send the request
127 print(f'Downloading inputs file ({output_file.path})\n')
128 try:
129 urllib.request.urlretrieve(request_url, output_file.path)
130 except:
131 raise Exception('Something went wrong when downloading the inputs file: ' + request_url)
132 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation)
133 if is_json:
134 file_content = load_json(output_file.path)
135 save_json(file_content, output_file.path, indent = 4)
137 # Get analysis data
138 def download_analysis_data(self, analysis_type: str, output_file: 'File'):
139 request_url = f'{self.url}/analyses/{analysis_type}'
140 print(f'Downloading {analysis_type} analysis data\n')
141 try:
142 urllib.request.urlretrieve(request_url, output_file.path)
143 # Format JSON if needed
144 file_content = load_json(output_file.path)
145 save_json(file_content, output_file.path, indent=4)
146 except Exception as error:
147 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}', 'with error: ' + str(error))
149# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5
150def my_hook(t):
151 """Wraps tqdm instance.
152 Don't forget to close() or __exit__()
153 the tqdm instance once you're done with it (easiest using `with` syntax).
154 Example
155 -------
156 >>> with tqdm(...) as t:
157 ... reporthook = my_hook(t)
158 ... urllib.urlretrieve(..., reporthook=reporthook)
159 """
160 last_b = [0]
162 def update_to(b=1, bsize=1, tsize=None):
163 """
164 b : int, optional
165 Number of blocks transferred so far [default: 1].
166 bsize : int, optional
167 Size of each block (in tqdm units) [default: 1].
168 tsize : int, optional
169 Total size (in tqdm units). If [default: None] remains unchanged.
170 """
171 if tsize is not None:
172 t.total = tsize
173 t.update((b - last_b[0]) * bsize)
174 last_b[0] = b
176 return update_to