Coverage for mddb_workflow / tools / get_lipids.py: 87%

53 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1import requests 

2from functools import lru_cache 

3from mddb_workflow.utils.auxiliar import warn 

4from mddb_workflow.utils.type_hints import * 

5 

6 

7def generate_lipid_references(inchikeys: dict[str, 'InChIKeyData']) -> dict[str, dict]: 

8 """Add lipid-specific database information to InChIKeyData objects. 

9 

10 This function queries SwissLipids and LIPID MAPS databases for each InChI key 

11 and adds the results directly to the InChIKeyData objects. It also performs 

12 quality checks on lipid classifications. 

13 

14 Args: 

15 inchikeys: Dictionary mapping InChI keys to InChIKeyData objects (modified in-place). 

16 

17 Returns: 

18 set: Set of InChI keys that were identified as lipids. 

19 

20 """ 

21 # Check internet connection 

22 try: 

23 is_in_swisslipids('test') 

24 except Exception as e: 

25 warn(f'There was a problem connecting to the SwissLipids database: {e}') 

26 return {} 

27 lipid_references = {} 

28 

29 for inchikey, mol_data in inchikeys.items(): 

30 # If we dont find it, we try without stereochemistry (only connection layer) 

31 SL_data = is_in_swisslipids(inchikey) or is_in_swisslipids(inchikey, only_first_layer=True) 

32 LM_data = is_in_LIPID_MAPS(inchikey) or is_in_LIPID_MAPS(inchikey, only_first_layer=True) 

33 

34 # Add lipid database data to InChIKeyData 

35 if SL_data or LM_data: 

36 lipid_references[inchikey] = {'swisslipids': SL_data, 'lipidmaps': LM_data} 

37 

38 # QUALITY CHECKS 

39 clasi = mol_data.classification 

40 # If the residue is a lipid, we check if it is classified as fatty/steroid 

41 if all('fatty' not in classes for classes in clasi) and \ 

42 all('steroid' not in classes for classes in clasi): 

43 warn(f'The {mol_data.moltype} {mol_data.molname} is classified as {clasi}, ' 

44 f'but the InChIKey "{inchikey}" may be from a lipid.') 

45 else: 

46 # If the InChIKey is not in SwissLipids or LIPID MAPS, check classification 

47 if any('fatty' in classes for classes in mol_data.classification): 

48 warn(f'The InChIKey {inchikey} of {mol_data.molname} is ' 

49 f'classified as fatty but is not a lipid.\n' 

50 f'Resindices: {str(mol_data.resindices)}') 

51 

52 return lipid_references 

53 

54 

55@lru_cache(maxsize=None) 

56def is_in_LIPID_MAPS(inchikey, only_first_layer=False) -> dict: 

57 """Search the InChI keys in LIPID MAPS.""" 

58 headers = {'accept': 'json'} 

59 # https://www.lipidmaps.org/resources/rest 

60 # Output item = physchem, is the only one that returns data for the inchi key 

61 # for only the two first layers (main and atom connection) 

62 # To see InChIKey layers: 

63 # https://www.inchi-trust.org/about-the-inchi-standard/ 

64 # Or https://www.rhea-db.org/help/inchi-inchikey#What_is_an_InChIKey_ 

65 key = inchikey[:14] if only_first_layer else inchikey 

66 url = f"https://www.lipidmaps.org/rest/compound/inchi_key/{key}/all" 

67 response = requests.get(url, headers=headers) 

68 if response.status_code == 200: 

69 js = response.json() 

70 if js != []: 

71 return js 

72 else: 

73 return False 

74 else: 

75 print(f"Error for {inchikey}: {response.status_code}") 

76 

77 

78def get_swisslipids_info(entity_id) -> dict: 

79 """Get information about a SwissLipids entry.""" 

80 headers = {'accept': 'json'} 

81 url = f"https://www.swisslipids.org/api/index.php/entity/{entity_id}" 

82 response = requests.get(url, headers=headers) 

83 if response.status_code == 200: 

84 return response.json() 

85 else: 

86 return False 

87 

88 

89@lru_cache(maxsize=None) 

90def is_in_swisslipids(inchikey, only_first_layer=False) -> dict: 

91 """Search the InChI keys in SwissLipids. 

92 Documentation: https://www.swisslipids.org/#/api. 

93 """ 

94 key = inchikey[:14] if only_first_layer else inchikey 

95 headers = {'accept': 'json'} 

96 url = f"https://www.swisslipids.org/api/index.php/search?term={key}" 

97 response = requests.get(url, headers=headers) 

98 if response.status_code == 200: 

99 data = response.json()[0] 

100 detailed_data = get_swisslipids_info(data['entity_id']) 

101 data['synonyms'] = detailed_data.get('synonyms', []) 

102 return data 

103 else: 

104 return False