Coverage for mddb_workflow / tools / get_lipids.py: 87%
53 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1import requests
2from functools import lru_cache
3from mddb_workflow.utils.auxiliar import warn
4from mddb_workflow.utils.type_hints import *
7def generate_lipid_references(inchikeys: dict[str, 'InChIKeyData']) -> dict[str, dict]:
8 """Add lipid-specific database information to InChIKeyData objects.
10 This function queries SwissLipids and LIPID MAPS databases for each InChI key
11 and adds the results directly to the InChIKeyData objects. It also performs
12 quality checks on lipid classifications.
14 Args:
15 inchikeys: Dictionary mapping InChI keys to InChIKeyData objects (modified in-place).
17 Returns:
18 set: Set of InChI keys that were identified as lipids.
20 """
21 # Check internet connection
22 try:
23 is_in_swisslipids('test')
24 except Exception as e:
25 warn(f'There was a problem connecting to the SwissLipids database: {e}')
26 return {}
27 lipid_references = {}
29 for inchikey, mol_data in inchikeys.items():
30 # If we dont find it, we try without stereochemistry (only connection layer)
31 SL_data = is_in_swisslipids(inchikey) or is_in_swisslipids(inchikey, only_first_layer=True)
32 LM_data = is_in_LIPID_MAPS(inchikey) or is_in_LIPID_MAPS(inchikey, only_first_layer=True)
34 # Add lipid database data to InChIKeyData
35 if SL_data or LM_data:
36 lipid_references[inchikey] = {'swisslipids': SL_data, 'lipidmaps': LM_data}
38 # QUALITY CHECKS
39 clasi = mol_data.classification
40 # If the residue is a lipid, we check if it is classified as fatty/steroid
41 if all('fatty' not in classes for classes in clasi) and \
42 all('steroid' not in classes for classes in clasi):
43 warn(f'The {mol_data.moltype} {mol_data.molname} is classified as {clasi}, '
44 f'but the InChIKey "{inchikey}" may be from a lipid.')
45 else:
46 # If the InChIKey is not in SwissLipids or LIPID MAPS, check classification
47 if any('fatty' in classes for classes in mol_data.classification):
48 warn(f'The InChIKey {inchikey} of {mol_data.molname} is '
49 f'classified as fatty but is not a lipid.\n'
50 f'Resindices: {str(mol_data.resindices)}')
52 return lipid_references
55@lru_cache(maxsize=None)
56def is_in_LIPID_MAPS(inchikey, only_first_layer=False) -> dict:
57 """Search the InChI keys in LIPID MAPS."""
58 headers = {'accept': 'json'}
59 # https://www.lipidmaps.org/resources/rest
60 # Output item = physchem, is the only one that returns data for the inchi key
61 # for only the two first layers (main and atom connection)
62 # To see InChIKey layers:
63 # https://www.inchi-trust.org/about-the-inchi-standard/
64 # Or https://www.rhea-db.org/help/inchi-inchikey#What_is_an_InChIKey_
65 key = inchikey[:14] if only_first_layer else inchikey
66 url = f"https://www.lipidmaps.org/rest/compound/inchi_key/{key}/all"
67 response = requests.get(url, headers=headers)
68 if response.status_code == 200:
69 js = response.json()
70 if js != []:
71 return js
72 else:
73 return False
74 else:
75 print(f"Error for {inchikey}: {response.status_code}")
78def get_swisslipids_info(entity_id) -> dict:
79 """Get information about a SwissLipids entry."""
80 headers = {'accept': 'json'}
81 url = f"https://www.swisslipids.org/api/index.php/entity/{entity_id}"
82 response = requests.get(url, headers=headers)
83 if response.status_code == 200:
84 return response.json()
85 else:
86 return False
89@lru_cache(maxsize=None)
90def is_in_swisslipids(inchikey, only_first_layer=False) -> dict:
91 """Search the InChI keys in SwissLipids.
92 Documentation: https://www.swisslipids.org/#/api.
93 """
94 key = inchikey[:14] if only_first_layer else inchikey
95 headers = {'accept': 'json'}
96 url = f"https://www.swisslipids.org/api/index.php/search?term={key}"
97 response = requests.get(url, headers=headers)
98 if response.status_code == 200:
99 data = response.json()[0]
100 detailed_data = get_swisslipids_info(data['entity_id'])
101 data['synonyms'] = detailed_data.get('synonyms', [])
102 return data
103 else:
104 return False