Coverage for model_workflow/utils/remote.py: 65%

108 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import urllib.request 

2import json 

3from tqdm import tqdm 

4from model_workflow.utils.auxiliar import load_json, save_json, InputError 

5from model_workflow.utils.type_hints import * 

6 

7class Remote: 

8 def __init__ (self, database_url : str, accession : str): 

9 # Save input arguments 

10 self.database_url = database_url 

11 self.accession = accession 

12 # Set the URL 

13 self.url = f'{database_url}rest/current/projects/{accession}' 

14 # Set internal variables 

15 self._project_data = None 

16 self._available_files = None 

17 # Download project data to make sure we have database access and the project exists 

18 self.get_project_data() 

19 

20 # Get project data 

21 # This is only used to make sure the project exists by now 

22 def get_project_data (self): 

23 # Return the internal value if we already have it 

24 if self._project_data != None: 

25 return self._project_data 

26 # Otherwise request the project data to the API 

27 try: 

28 response = urllib.request.urlopen(self.url) 

29 self._project_data = json.loads(response.read()) 

30 except urllib.error.HTTPError as error: 

31 # Try to provide comprehensive error logs depending on the error 

32 # If project was not found 

33 if error.code == 404: 

34 raise InputError(f'Remote project "{self.accession}" not found') 

35 # If we don't know the error then simply say something went wrong 

36 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error)) 

37 except: 

38 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error)) 

39 

40 # Get available files in the remove project 

41 def get_available_files (self): 

42 # Return the internal value if we already have it 

43 if self._available_files != None: 

44 return self._available_files 

45 # Otherwise request the available files to the API 

46 request_url = self.url + '/files' 

47 try: 

48 response = urllib.request.urlopen(request_url) 

49 self._available_files = json.loads(response.read()) 

50 except: 

51 raise Exception('Something went wrong when requesting available files: ' + request_url) 

52 return self._available_files 

53 available_files = property(get_available_files, None, None, "Remote available files (read only)") 

54 

55 # Download a specific file 

56 def download_file (self, output_file : 'File'): 

57 request_url = f'{self.url}/files/{output_file.filename}' 

58 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n') 

59 try: 

60 urllib.request.urlretrieve(request_url, output_file.path) 

61 except urllib.error.HTTPError as error: 

62 # Try to provide comprehensive error logs depending on the error 

63 # If file was not found 

64 if error.code == 404: 

65 raise Exception(f'Missing remote file "{output_file.filename}"') 

66 # If we don't know the error then simply say something went wrong 

67 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url) 

68 

69 # Download the project standard topology 

70 def download_standard_topology (self, output_file : 'File'): 

71 request_url = self.url + '/topology' 

72 print(f'Downloading standard topology ({output_file.path})\n') 

73 try: 

74 urllib.request.urlretrieve(request_url, output_file.path) 

75 except: 

76 raise Exception('Something went wrong when downloading the standard topology: ' + request_url) 

77 

78 # Download the standard structure 

79 def download_standard_structure (self, output_file : 'File'): 

80 request_url = self.url + '/structure' 

81 print(f'Downloading standard structure ({output_file.path})\n') 

82 try: 

83 urllib.request.urlretrieve(request_url, output_file.path) 

84 except: 

85 raise Exception('Something went wrong when downloading the standard structure: ' + request_url) 

86 

87 # Download the main trajectory 

88 def download_trajectory (self, 

89 output_file : 'File', 

90 frame_selection : Optional[str] = None, 

91 atom_selection : Optional[str] = None, 

92 format : Optional[str] = None 

93 ): 

94 if [frame_selection, atom_selection, format] == [None,None,'xtc']: 

95 # If we dont have a specific request, we can download the main trajectory  

96 # directly from the trajectory.xtc file so it is faster 

97 request_url = f'{self.url}/files/trajectory.xtc' 

98 else: 

99 # Set the base URL 

100 request_url = self.url + '/trajectory' 

101 # Additional arguments to be included in the URL 

102 arguments = [] 

103 if frame_selection: 

104 arguments.append(f'frames={frame_selection}') 

105 if atom_selection: 

106 arguments.append(f'atoms={atom_selection}') 

107 if format: 

108 arguments.append(f'format={format}') 

109 if len(arguments) > 0: 

110 request_url += '?' + '&'.join(arguments) 

111 # Send the request 

112 print(f'Downloading main trajectory ({output_file.path})') 

113 try: 

114 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024, 

115 miniters = 1, desc = ' Progress', leave=False) as t: 

116 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t)) 

117 except: 

118 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url) 

119 

120 # Download the inputs file 

121 def download_inputs_file (self, output_file : 'File'): 

122 request_url = self.url + '/inputs' 

123 # In case this is a json file we must specify the format in the query 

124 is_json = output_file.format == 'json' 

125 if is_json: 

126 request_url += '?format=json' 

127 # Send the request 

128 print(f'Downloading inputs file ({output_file.path})\n') 

129 try: 

130 urllib.request.urlretrieve(request_url, output_file.path) 

131 except: 

132 raise Exception('Something went wrong when downloading the inputs file: ' + request_url) 

133 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation) 

134 if is_json: 

135 file_content = load_json(output_file.path) 

136 save_json(file_content, output_file.path, indent = 4) 

137 

138 # Get analysis data 

139 def download_analysis_data(self, analysis_type: str, output_file: 'File'): 

140 request_url = f'{self.url}/analyses/{analysis_type}' 

141 print(f'Downloading {analysis_type} analysis data\n') 

142 try: 

143 urllib.request.urlretrieve(request_url, output_file.path) 

144 # Format JSON if needed 

145 file_content = load_json(output_file.path) 

146 save_json(file_content, output_file.path, indent=4) 

147 except: 

148 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}') 

149 

150# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5 

151def my_hook(t): 

152 """Wraps tqdm instance. 

153 Don't forget to close() or __exit__() 

154 the tqdm instance once you're done with it (easiest using `with` syntax). 

155 Example 

156 ------- 

157 >>> with tqdm(...) as t: 

158 ... reporthook = my_hook(t) 

159 ... urllib.urlretrieve(..., reporthook=reporthook) 

160 """ 

161 last_b = [0] 

162 

163 def update_to(b=1, bsize=1, tsize=None): 

164 """ 

165 b : int, optional 

166 Number of blocks transferred so far [default: 1]. 

167 bsize : int, optional 

168 Size of each block (in tqdm units) [default: 1]. 

169 tsize : int, optional 

170 Total size (in tqdm units). If [default: None] remains unchanged. 

171 """ 

172 if tsize is not None: 

173 t.total = tsize 

174 t.update((b - last_b[0]) * bsize) 

175 last_b[0] = b 

176 

177 return update_to