Coverage for mddb_workflow/tools/generate_lipid_references.py: 89%
61 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1import requests
2from functools import lru_cache
3from mddb_workflow.utils.auxiliar import save_json
4from mddb_workflow.utils.auxiliar import warn
5from mddb_workflow.utils.type_hints import *
7def generate_lipid_references(inchikeys: dict,
8 output_filepath : str,
9 ) -> list[dict]:
10 """Generate the lipid references.
12 Returns:
13 dict: A list of dictionaries containing lipid references and
14 the residues indices. For example:
15 [{'name': 'CHL1',
16 'residue_indices': [935, 936, 937, ...],
17 'fragments': [],
18 'match': {'ref': {'inchikey': 'HVYWMOMLDIMFJA-DPAQBDIFSA-N'}}}, ...]
19 """
20 # Patch case where there no internet
21 try:
22 # This would return a ConnectionError
23 is_in_swisslipids('test')
24 except Exception as e:
25 # Then we map the lipids/membrane
26 warn(f'There was a problem connecting to the SwissLipids database: {e}')
27 return None
29 lipid_references = []
30 lipid_map = []
31 for inchikey, res_data in inchikeys['key_2_name'].items():
32 SL_data = is_in_swisslipids(inchikey)
33 LM_data = is_in_LIPID_MAPS(inchikey)
34 # If we dont find it, we try without stereochemistry
35 if not SL_data:
36 SL_data = is_in_swisslipids(inchikey, only_first_layer=True)
37 if not LM_data:
38 LM_data = is_in_LIPID_MAPS(inchikey, only_first_layer=True)
40 # We don't use lipid data for now, if we have it it is present in LIPID MAPS
41 if SL_data or LM_data:
42 lipid_references.append({'inchikey': inchikey,
43 'inchi': res_data['inchi'],
44 'swisslipids': SL_data,
45 'lipidmaps': LM_data,
46 })
47 # Format needed for generate_residue_mapping
48 lipid_map.append({
49 'name': list(res_data['resname'])[0],
50 'residue_indices': list(map(int, res_data['resindices'])),
51 'fragments': res_data['fragments'],
52 'match': {
53 'ref': { 'inchikey': inchikey } }
54 })
56 # QUALITY CHECKS
57 cls = res_data['classification']
58 # If the residue is a lipid, we check if it is classified as fatty/steroid
59 if all('fatty' not in classes for classes in cls) and \
60 all('steroid' not in classes for classes in cls):
61 warn(f'The residue {str(res_data["resname"])} is classified as {cls}, but the InChIKey "{inchikey}" is a lipid.')
63 else:
64 # If the InChIKey is not in SwissLipids or LIPID MAPS, we check if it is classified as fatty
65 if any('fatty' in classes for classes in res_data['classification']):
66 warn(f'The InChIKey {inchikey} of {str(res_data["resname"])} is '
67 f'classified as fatty but is not a lipid.\n'
68 f'Resindices: {str(res_data["resindices"])}')
70 save_json(lipid_references, output_filepath)
71 return lipid_map
73@lru_cache(maxsize=None)
74def is_in_LIPID_MAPS(inchikey, only_first_layer=False) -> dict:
75 """Search the InChi keys in LIPID MAPS"""
76 headers = {'accept': 'json'}
77 # https://www.lipidmaps.org/resources/rest
78 # Output item = physchem, is the only one that returns data for the inchi key
79 # for only the two first layers (main and atom connection)
80 # To see InChiKey layers:
81 # https://www.inchi-trust.org/about-the-inchi-standard/
82 # Or https://www.rhea-db.org/help/inchi-inchikey#What_is_an_InChIKey_
83 key = inchikey[:14] if only_first_layer else inchikey
84 url = f"https://www.lipidmaps.org/rest/compound/inchi_key/{key}/all"
85 response = requests.get(url, headers=headers)
86 if response.status_code == 200:
87 js = response.json()
88 if js != []:
89 return js
90 else:
91 return False
92 else:
93 print(f"Error for {inchikey}: {response.status_code}")
95@lru_cache(maxsize=None)
96def is_in_swisslipids(inchikey, only_first_layer=False,
97 protonation=True) -> dict:
98 """Search the InChi keys in SwissLipids. Documentation: https://www.swisslipids.org/#/api"""
99 key = inchikey[:14] if only_first_layer else inchikey
100 headers = {'accept': 'json'}
101 url = f"https://www.swisslipids.org/api/index.php/search?term={key}"
102 response = requests.get(url, headers=headers)
103 if response.status_code == 200:
104 data = response.json()[0]
105 detailed_data = get_swisslipids_info(data['entity_id'])
106 data['synonyms'] = detailed_data.get('synonyms', [])
107 return data
108 else:
109 return False
111def get_swisslipids_info(entity_id) -> dict:
112 """Get information about a SwissLipids entry."""
113 headers = {'accept': 'json'}
114 url = f"https://www.swisslipids.org/api/index.php/entity/{entity_id}"
115 response = requests.get(url, headers=headers)
116 if response.status_code == 200:
117 return response.json()
118 else:
119 return False