Coverage for mddb_workflow/utils/remote.py: 69%

108 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1import urllib.request 

2import json 

3from tqdm import tqdm 

4from mddb_workflow.utils.auxiliar import load_json, save_json, InputError 

5from mddb_workflow.utils.type_hints import * 

6 

7 

8class Remote: 

9 def __init__(self, database_url: str, accession: str): 

10 # Save input arguments 

11 self.database_url = database_url 

12 self.accession = accession 

13 # Set the URL 

14 self.url = f'{database_url}rest/current/projects/{accession}' 

15 # Set internal variables 

16 self._project_data = None 

17 self._available_files = None 

18 # Download project data to make sure we have database access and the project exists 

19 self.get_project_data() 

20 

21 # Get project data 

22 # This is only used to make sure the project exists by now 

23 def get_project_data(self): 

24 # Return the internal value if we already have it 

25 if self._project_data != None: 

26 return self._project_data 

27 # Otherwise request the project data to the API 

28 try: 

29 response = urllib.request.urlopen(self.url) 

30 self._project_data = json.loads(response.read()) 

31 except urllib.error.HTTPError as error: 

32 # Try to provide comprehensive error logs depending on the error 

33 # If project was not found 

34 if error.code == 404: 

35 raise InputError(f'Remote project "{self.accession}" not found') 

36 # If we don't know the error then simply say something went wrong 

37 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error)) 

38 except: 

39 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error)) 

40 

41 # Get available files in the remove project 

42 def get_available_files (self): 

43 # Return the internal value if we already have it 

44 if self._available_files != None: 

45 return self._available_files 

46 # Otherwise request the available files to the API 

47 request_url = self.url + '/files' 

48 try: 

49 response = urllib.request.urlopen(request_url) 

50 self._available_files = json.loads(response.read()) 

51 except: 

52 raise Exception('Something went wrong when requesting available files: ' + request_url) 

53 return self._available_files 

54 available_files = property(get_available_files, None, None, "Remote available files (read only)") 

55 

56 def download_file (self, output_file : 'File'): 

57 """Download a specific file from the project/files endpoint.""" 

58 request_url = f'{self.url}/files/{output_file.filename}' 

59 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n') 

60 try: 

61 urllib.request.urlretrieve(request_url, output_file.path) 

62 except urllib.error.HTTPError as error: 

63 if error.code == 404: 

64 raise Exception(f'Missing remote file "{output_file.filename}"') 

65 # If we don't know the error then simply say something went wrong 

66 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url) 

67 

68 # Download the project standard topology 

69 def download_standard_topology (self, output_file : 'File'): 

70 request_url = self.url + '/topology' 

71 print(f'Downloading standard topology ({output_file.path})\n') 

72 try: 

73 urllib.request.urlretrieve(request_url, output_file.path) 

74 except Exception as error: 

75 raise Exception('Something went wrong when downloading the standard topology: ' + request_url, 'with error: ' + str(error)) 

76 

77 # Download the standard structure 

78 def download_standard_structure (self, output_file : 'File'): 

79 request_url = self.url + '/structure' 

80 print(f'Downloading standard structure ({output_file.path})\n') 

81 try: 

82 urllib.request.urlretrieve(request_url, output_file.path) 

83 except Exception as error: 

84 raise Exception('Something went wrong when downloading the standard structure: ' + request_url, 'with error: ' + str(error)) 

85 

86 # Download the main trajectory 

87 def download_trajectory (self, 

88 output_file : 'File', 

89 frame_selection : Optional[str] = None, 

90 atom_selection : Optional[str] = None, 

91 format : Optional[str] = None 

92 ): 

93 if [frame_selection, atom_selection, format] == [None,None,'xtc']: 

94 # If we dont have a specific request, we can download the main trajectory  

95 # directly from the trajectory.xtc file so it is faster 

96 request_url = f'{self.url}/files/trajectory.xtc' 

97 else: 

98 # Set the base URL 

99 request_url = self.url + '/trajectory' 

100 # Additional arguments to be included in the URL 

101 arguments = [] 

102 if frame_selection: 

103 arguments.append(f'frames={frame_selection}') 

104 if atom_selection: 

105 arguments.append(f'atoms={atom_selection}') 

106 if format: 

107 arguments.append(f'format={format}') 

108 if len(arguments) > 0: 

109 request_url += '?' + '&'.join(arguments) 

110 # Send the request 

111 print(f'Downloading main trajectory ({output_file.path})') 

112 try: 

113 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024, 

114 miniters = 1, desc = ' Progress', leave=False) as t: 

115 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t)) 

116 except Exception as error: 

117 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url, 'with error: ' + str(error)) 

118 

119 # Download the inputs file 

120 def download_inputs_file (self, output_file : 'File'): 

121 request_url = self.url + '/inputs' 

122 # In case this is a json file we must specify the format in the query 

123 is_json = output_file.format == 'json' 

124 if is_json: 

125 request_url += '?format=json' 

126 # Send the request 

127 print(f'Downloading inputs file ({output_file.path})\n') 

128 try: 

129 urllib.request.urlretrieve(request_url, output_file.path) 

130 except: 

131 raise Exception('Something went wrong when downloading the inputs file: ' + request_url) 

132 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation) 

133 if is_json: 

134 file_content = load_json(output_file.path) 

135 save_json(file_content, output_file.path, indent = 4) 

136 

137 # Get analysis data 

138 def download_analysis_data(self, analysis_type: str, output_file: 'File'): 

139 request_url = f'{self.url}/analyses/{analysis_type}' 

140 print(f'Downloading {analysis_type} analysis data\n') 

141 try: 

142 urllib.request.urlretrieve(request_url, output_file.path) 

143 # Format JSON if needed 

144 file_content = load_json(output_file.path) 

145 save_json(file_content, output_file.path, indent=4) 

146 except Exception as error: 

147 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}', 'with error: ' + str(error)) 

148 

149# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5 

150def my_hook(t): 

151 """Wraps tqdm instance. 

152 Don't forget to close() or __exit__() 

153 the tqdm instance once you're done with it (easiest using `with` syntax). 

154 Example 

155 ------- 

156 >>> with tqdm(...) as t: 

157 ... reporthook = my_hook(t) 

158 ... urllib.urlretrieve(..., reporthook=reporthook) 

159 """ 

160 last_b = [0] 

161 

162 def update_to(b=1, bsize=1, tsize=None): 

163 """ 

164 b : int, optional 

165 Number of blocks transferred so far [default: 1]. 

166 bsize : int, optional 

167 Size of each block (in tqdm units) [default: 1]. 

168 tsize : int, optional 

169 Total size (in tqdm units). If [default: None] remains unchanged. 

170 """ 

171 if tsize is not None: 

172 t.total = tsize 

173 t.update((b - last_b[0]) * bsize) 

174 last_b[0] = b 

175 

176 return update_to