Coverage for model_workflow/utils/remote.py: 65%

1import urllib.request

2import json

3from tqdm import tqdm

4from model_workflow.utils.auxiliar import load_json, save_json, InputError

5from model_workflow.utils.type_hints import *

7class Remote:

8 def __init__ (self, database_url : str, accession : str):

9 # Save input arguments

10 self.database_url = database_url

11 self.accession = accession

12 # Set the URL

13 self.url = f'{database_url}rest/current/projects/{accession}'

14 # Set internal variables

15 self._project_data = None

16 self._available_files = None

17 # Download project data to make sure we have database access and the project exists

18 self.get_project_data()

20 # Get project data

21 # This is only used to make sure the project exists by now

22 def get_project_data (self):

23 # Return the internal value if we already have it

24 if self._project_data != None:

25 return self._project_data

26 # Otherwise request the project data to the API

27 try:

28 response = urllib.request.urlopen(self.url)

29 self._project_data = json.loads(response.read())

30 except urllib.error.HTTPError as error:

31 # Try to provide comprehensive error logs depending on the error

32 # If project was not found

33 if error.code == 404:

34 raise InputError(f'Remote project "{self.accession}" not found')

35 # If we don't know the error then simply say something went wrong

36 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error))

37 except:

38 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error))

40 # Get available files in the remove project

41 def get_available_files (self):

42 # Return the internal value if we already have it

43 if self._available_files != None:

44 return self._available_files

45 # Otherwise request the available files to the API

46 request_url = self.url + '/files'

47 try:

48 response = urllib.request.urlopen(request_url)

49 self._available_files = json.loads(response.read())

50 except:

51 raise Exception('Something went wrong when requesting available files: ' + request_url)

52 return self._available_files

53 available_files = property(get_available_files, None, None, "Remote available files (read only)")

55 # Download a specific file

56 def download_file (self, output_file : 'File'):

57 request_url = f'{self.url}/files/{output_file.filename}'

58 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n')

59 try:

60 urllib.request.urlretrieve(request_url, output_file.path)

61 except urllib.error.HTTPError as error:

62 # Try to provide comprehensive error logs depending on the error

63 # If file was not found

64 if error.code == 404:

65 raise Exception(f'Missing remote file "{output_file.filename}"')

66 # If we don't know the error then simply say something went wrong

67 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url)

69 # Download the project standard topology

70 def download_standard_topology (self, output_file : 'File'):

71 request_url = self.url + '/topology'

72 print(f'Downloading standard topology ({output_file.path})\n')

73 try:

74 urllib.request.urlretrieve(request_url, output_file.path)

75 except:

76 raise Exception('Something went wrong when downloading the standard topology: ' + request_url)

78 # Download the standard structure

79 def download_standard_structure (self, output_file : 'File'):

80 request_url = self.url + '/structure'

81 print(f'Downloading standard structure ({output_file.path})\n')

82 try:

83 urllib.request.urlretrieve(request_url, output_file.path)

84 except:

85 raise Exception('Something went wrong when downloading the standard structure: ' + request_url)

87 # Download the main trajectory

88 def download_trajectory (self,

89 output_file : 'File',

90 frame_selection : Optional[str] = None,

91 atom_selection : Optional[str] = None,

92 format : Optional[str] = None

93 ):

94 if [frame_selection, atom_selection, format] == [None,None,'xtc']:

95 # If we dont have a specific request, we can download the main trajectory

96 # directly from the trajectory.xtc file so it is faster

97 request_url = f'{self.url}/files/trajectory.xtc'

98 else:

99 # Set the base URL

100 request_url = self.url + '/trajectory'

101 # Additional arguments to be included in the URL

102 arguments = []

103 if frame_selection:

104 arguments.append(f'frames={frame_selection}')

105 if atom_selection:

106 arguments.append(f'atoms={atom_selection}')

107 if format:

108 arguments.append(f'format={format}')

109 if len(arguments) > 0:

110 request_url += '?' + '&'.join(arguments)

111 # Send the request

112 print(f'Downloading main trajectory ({output_file.path})')

113 try:

114 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024,

115 miniters = 1, desc = ' Progress', leave=False) as t:

116 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t))

117 except:

118 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url)

119

120 # Download the inputs file

121 def download_inputs_file (self, output_file : 'File'):

122 request_url = self.url + '/inputs'

123 # In case this is a json file we must specify the format in the query

124 is_json = output_file.format == 'json'

125 if is_json:

126 request_url += '?format=json'

127 # Send the request

128 print(f'Downloading inputs file ({output_file.path})\n')

129 try:

130 urllib.request.urlretrieve(request_url, output_file.path)

131 except:

132 raise Exception('Something went wrong when downloading the inputs file: ' + request_url)

133 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation)

134 if is_json:

135 file_content = load_json(output_file.path)

136 save_json(file_content, output_file.path, indent = 4)

137

138 # Get analysis data

139 def download_analysis_data(self, analysis_type: str, output_file: 'File'):

140 request_url = f'{self.url}/analyses/{analysis_type}'

141 print(f'Downloading {analysis_type} analysis data\n')

142 try:

143 urllib.request.urlretrieve(request_url, output_file.path)

144 # Format JSON if needed

145 file_content = load_json(output_file.path)

146 save_json(file_content, output_file.path, indent=4)

147 except:

148 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}')

149

150# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5

151def my_hook(t):

152 """Wraps tqdm instance.

153 Don't forget to close() or __exit__()

154 the tqdm instance once you're done with it (easiest using `with` syntax).

155 Example

156 -------

157 >>> with tqdm(...) as t:

158 ... reporthook = my_hook(t)

159 ... urllib.urlretrieve(..., reporthook=reporthook)

160 """

161 last_b = [0]

162

163 def update_to(b=1, bsize=1, tsize=None):

164 """

165 b : int, optional

166 Number of blocks transferred so far [default: 1].

167 bsize : int, optional

168 Size of each block (in tqdm units) [default: 1].

169 tsize : int, optional

170 Total size (in tqdm units). If [default: None] remains unchanged.

171 """

172 if tsize is not None:

173 t.total = tsize

174 t.update((b - last_b[0]) * bsize)

175 last_b[0] = b

176

177 return update_to