Coverage for mddb_workflow/utils/remote.py: 69%

1import urllib.request

2import json

3from tqdm import tqdm

4from mddb_workflow.utils.auxiliar import load_json, save_json, InputError

5from mddb_workflow.utils.type_hints import *

8class Remote:

9 def __init__(self, database_url: str, accession: str):

10 # Save input arguments

11 self.database_url = database_url

12 self.accession = accession

13 # Set the URL

14 self.url = f'{database_url}rest/current/projects/{accession}'

15 # Set internal variables

16 self._project_data = None

17 self._available_files = None

18 # Download project data to make sure we have database access and the project exists

19 self.get_project_data()

21 # Get project data

22 # This is only used to make sure the project exists by now

23 def get_project_data(self):

24 # Return the internal value if we already have it

25 if self._project_data != None:

26 return self._project_data

27 # Otherwise request the project data to the API

28 try:

29 response = urllib.request.urlopen(self.url)

30 self._project_data = json.loads(response.read())

31 except urllib.error.HTTPError as error:

32 # Try to provide comprehensive error logs depending on the error

33 # If project was not found

34 if error.code == 404:

35 raise InputError(f'Remote project "{self.accession}" not found')

36 # If we don't know the error then simply say something went wrong

37 raise Exception('Error when downloading project data: ' + self.url, 'with error: ' + str(error))

38 except:

39 raise Exception('Something went wrong when requesting project data: ' + self.url, 'with error: ' + str(error))

41 # Get available files in the remove project

42 def get_available_files (self):

43 # Return the internal value if we already have it

44 if self._available_files != None:

45 return self._available_files

46 # Otherwise request the available files to the API

47 request_url = self.url + '/files'

48 try:

49 response = urllib.request.urlopen(request_url)

50 self._available_files = json.loads(response.read())

51 except:

52 raise Exception('Something went wrong when requesting available files: ' + request_url)

53 return self._available_files

54 available_files = property(get_available_files, None, None, "Remote available files (read only)")

56 def download_file (self, output_file : 'File'):

57 """Download a specific file from the project/files endpoint."""

58 request_url = f'{self.url}/files/{output_file.filename}'

59 print(f'Downloading file "{output_file.filename}" ({output_file.path})\n')

60 try:

61 urllib.request.urlretrieve(request_url, output_file.path)

62 except urllib.error.HTTPError as error:

63 if error.code == 404:

64 raise Exception(f'Missing remote file "{output_file.filename}"')

65 # If we don't know the error then simply say something went wrong

66 raise Exception(f'Something went wrong when downloading file "{output_file.filename}": ' + request_url)

68 # Download the project standard topology

69 def download_standard_topology (self, output_file : 'File'):

70 request_url = self.url + '/topology'

71 print(f'Downloading standard topology ({output_file.path})\n')

72 try:

73 urllib.request.urlretrieve(request_url, output_file.path)

74 except Exception as error:

75 raise Exception('Something went wrong when downloading the standard topology: ' + request_url, 'with error: ' + str(error))

77 # Download the standard structure

78 def download_standard_structure (self, output_file : 'File'):

79 request_url = self.url + '/structure'

80 print(f'Downloading standard structure ({output_file.path})\n')

81 try:

82 urllib.request.urlretrieve(request_url, output_file.path)

83 except Exception as error:

84 raise Exception('Something went wrong when downloading the standard structure: ' + request_url, 'with error: ' + str(error))

86 # Download the main trajectory

87 def download_trajectory (self,

88 output_file : 'File',

89 frame_selection : Optional[str] = None,

90 atom_selection : Optional[str] = None,

91 format : Optional[str] = None

92 ):

93 if [frame_selection, atom_selection, format] == [None,None,'xtc']:

94 # If we dont have a specific request, we can download the main trajectory

95 # directly from the trajectory.xtc file so it is faster

96 request_url = f'{self.url}/files/trajectory.xtc'

97 else:

98 # Set the base URL

99 request_url = self.url + '/trajectory'

100 # Additional arguments to be included in the URL

101 arguments = []

102 if frame_selection:

103 arguments.append(f'frames={frame_selection}')

104 if atom_selection:

105 arguments.append(f'atoms={atom_selection}')

106 if format:

107 arguments.append(f'format={format}')

108 if len(arguments) > 0:

109 request_url += '?' + '&'.join(arguments)

110 # Send the request

111 print(f'Downloading main trajectory ({output_file.path})')

112 try:

113 with tqdm(unit = 'B', unit_scale = True, unit_divisor = 1024,

114 miniters = 1, desc = ' Progress', leave=False) as t:

115 urllib.request.urlretrieve(request_url, output_file.path, reporthook = my_hook(t))

116 except Exception as error:

117 raise Exception('Something went wrong when downloading the main trajectory: ' + request_url, 'with error: ' + str(error))

118

119 # Download the inputs file

120 def download_inputs_file (self, output_file : 'File'):

121 request_url = self.url + '/inputs'

122 # In case this is a json file we must specify the format in the query

123 is_json = output_file.format == 'json'

124 if is_json:

125 request_url += '?format=json'

126 # Send the request

127 print(f'Downloading inputs file ({output_file.path})\n')

128 try:

129 urllib.request.urlretrieve(request_url, output_file.path)

130 except:

131 raise Exception('Something went wrong when downloading the inputs file: ' + request_url)

132 # If this is a json file then rewrite the inputs file in a pretty formatted way (with indentation)

133 if is_json:

134 file_content = load_json(output_file.path)

135 save_json(file_content, output_file.path, indent = 4)

136

137 # Get analysis data

138 def download_analysis_data(self, analysis_type: str, output_file: 'File'):

139 request_url = f'{self.url}/analyses/{analysis_type}'

140 print(f'Downloading {analysis_type} analysis data\n')

141 try:

142 urllib.request.urlretrieve(request_url, output_file.path)

143 # Format JSON if needed

144 file_content = load_json(output_file.path)

145 save_json(file_content, output_file.path, indent=4)

146 except Exception as error:

147 raise Exception(f'Something went wrong when retrieving {analysis_type} analysis: {request_url}', 'with error: ' + str(error))

148

149# from https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5

150def my_hook(t):

151 """Wraps tqdm instance.

152 Don't forget to close() or __exit__()

153 the tqdm instance once you're done with it (easiest using `with` syntax).

154 Example

155 -------

156 >>> with tqdm(...) as t:

157 ... reporthook = my_hook(t)

158 ... urllib.urlretrieve(..., reporthook=reporthook)

159 """

160 last_b = [0]

161

162 def update_to(b=1, bsize=1, tsize=None):

163 """

164 b : int, optional

165 Number of blocks transferred so far [default: 1].

166 bsize : int, optional

167 Size of each block (in tqdm units) [default: 1].

168 tsize : int, optional

169 Total size (in tqdm units). If [default: None] remains unchanged.

170 """

171 if tsize is not None:

172 t.total = tsize

173 t.update((b - last_b[0]) * bsize)

174 last_b[0] = b

175

176 return update_to