Coverage for mddb_workflow/tools/provenance.py: 32%

28 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1import zipfile 

2import sqlite3 

3 

4from mddb_workflow.utils.auxiliar import InputError, save_json 

5from mddb_workflow.utils.constants import OUTPUT_PROVENANCE_FILENAME 

6from mddb_workflow.utils.type_hints import * 

7 

8def produce_provenance ( 

9 output_directory : str, 

10 aiida_data_file : Optional['File'], 

11): 

12 """Produce a provenance file containing AiiDA data adapted for our database""" 

13 

14 # The AiiDA exported file is a zip-compressed sqlite file 

15 if not aiida_data_file or not aiida_data_file.exists: 

16 print(' There is no AiiDA data') 

17 return 

18 

19 # So the first step is extracting all zip contents 

20 try: 

21 with zipfile.ZipFile(aiida_data_file.path, 'r') as zip_ref: 

22 zip_ref.extractall(output_directory) 

23 except zipfile.BadZipFile: 

24 raise InputError('AiiDA data file must be a zip-compressed sqlite.\n' + 

25 ' The file you provided is not zip-compressed: ' + aiida_data_file.path) 

26 except: 

27 raise RuntimeError(f'Something went wrong while decompressing zip file {aiida_data_file.path}') 

28 

29 # Now find the db file among decompressed files and parse it 

30 # DANI: Asumo que siempre se llamará igual, pero quien sabe 

31 db_filepath = f'{output_directory}/db.sqlite3' 

32 

33 # Setup the database 

34 connection = sqlite3.connect(db_filepath) 

35 cursor = connection.cursor() 

36 

37 # Mine the target tables 

38 # DANI: El día que hice esto el resto de tablas estaban vacías 

39 target_tables = { 'db_dbcomputer', 'db_dbuser', 'db_dbnode', 'db_dblink' } 

40 tables = {} 

41 for table_name in target_tables: 

42 cursor.execute(f'select * from {table_name}') 

43 headers = [desc[0] for desc in cursor.description] 

44 all_rows = cursor.fetchall() 

45 tables[table_name] = { 'headers': headers, 'rows': all_rows } 

46 

47 # Save data in JSON format 

48 output_filepath = f'{output_directory}/{OUTPUT_PROVENANCE_FILENAME}' 

49 save_json(tables, output_filepath)