Coverage for mddb_workflow/tools/generate_lipid_references.py: 89%

61 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1import requests 

2from functools import lru_cache 

3from mddb_workflow.utils.auxiliar import save_json 

4from mddb_workflow.utils.auxiliar import warn 

5from mddb_workflow.utils.type_hints import * 

6 

7def generate_lipid_references(inchikeys: dict, 

8 output_filepath : str, 

9 ) -> list[dict]: 

10 """Generate the lipid references. 

11 

12 Returns: 

13 dict: A list of dictionaries containing lipid references and 

14 the residues indices. For example: 

15 [{'name': 'CHL1', 

16 'residue_indices': [935, 936, 937, ...], 

17 'fragments': [], 

18 'match': {'ref': {'inchikey': 'HVYWMOMLDIMFJA-DPAQBDIFSA-N'}}}, ...] 

19 """ 

20 # Patch case where there no internet 

21 try: 

22 # This would return a ConnectionError 

23 is_in_swisslipids('test') 

24 except Exception as e: 

25 # Then we map the lipids/membrane 

26 warn(f'There was a problem connecting to the SwissLipids database: {e}') 

27 return None 

28 

29 lipid_references = [] 

30 lipid_map = [] 

31 for inchikey, res_data in inchikeys['key_2_name'].items(): 

32 SL_data = is_in_swisslipids(inchikey) 

33 LM_data = is_in_LIPID_MAPS(inchikey) 

34 # If we dont find it, we try without stereochemistry 

35 if not SL_data: 

36 SL_data = is_in_swisslipids(inchikey, only_first_layer=True) 

37 if not LM_data: 

38 LM_data = is_in_LIPID_MAPS(inchikey, only_first_layer=True) 

39 

40 # We don't use lipid data for now, if we have it it is present in LIPID MAPS 

41 if SL_data or LM_data: 

42 lipid_references.append({'inchikey': inchikey, 

43 'inchi': res_data['inchi'], 

44 'swisslipids': SL_data, 

45 'lipidmaps': LM_data, 

46 }) 

47 # Format needed for generate_residue_mapping 

48 lipid_map.append({ 

49 'name': list(res_data['resname'])[0], 

50 'residue_indices': list(map(int, res_data['resindices'])), 

51 'fragments': res_data['fragments'], 

52 'match': { 

53 'ref': { 'inchikey': inchikey } } 

54 }) 

55 

56 # QUALITY CHECKS 

57 cls = res_data['classification'] 

58 # If the residue is a lipid, we check if it is classified as fatty/steroid 

59 if all('fatty' not in classes for classes in cls) and \ 

60 all('steroid' not in classes for classes in cls): 

61 warn(f'The residue {str(res_data["resname"])} is classified as {cls}, but the InChIKey "{inchikey}" is a lipid.') 

62 

63 else: 

64 # If the InChIKey is not in SwissLipids or LIPID MAPS, we check if it is classified as fatty 

65 if any('fatty' in classes for classes in res_data['classification']): 

66 warn(f'The InChIKey {inchikey} of {str(res_data["resname"])} is ' 

67 f'classified as fatty but is not a lipid.\n' 

68 f'Resindices: {str(res_data["resindices"])}') 

69 

70 save_json(lipid_references, output_filepath) 

71 return lipid_map 

72 

73@lru_cache(maxsize=None) 

74def is_in_LIPID_MAPS(inchikey, only_first_layer=False) -> dict: 

75 """Search the InChi keys in LIPID MAPS""" 

76 headers = {'accept': 'json'} 

77 # https://www.lipidmaps.org/resources/rest 

78 # Output item = physchem, is the only one that returns data for the inchi key 

79 # for only the two first layers (main and atom connection) 

80 # To see InChiKey layers:  

81 # https://www.inchi-trust.org/about-the-inchi-standard/ 

82 # Or https://www.rhea-db.org/help/inchi-inchikey#What_is_an_InChIKey_ 

83 key = inchikey[:14] if only_first_layer else inchikey 

84 url = f"https://www.lipidmaps.org/rest/compound/inchi_key/{key}/all" 

85 response = requests.get(url, headers=headers) 

86 if response.status_code == 200: 

87 js = response.json() 

88 if js != []: 

89 return js 

90 else: 

91 return False 

92 else: 

93 print(f"Error for {inchikey}: {response.status_code}") 

94 

95@lru_cache(maxsize=None) 

96def is_in_swisslipids(inchikey, only_first_layer=False, 

97 protonation=True) -> dict: 

98 """Search the InChi keys in SwissLipids. Documentation: https://www.swisslipids.org/#/api""" 

99 key = inchikey[:14] if only_first_layer else inchikey 

100 headers = {'accept': 'json'} 

101 url = f"https://www.swisslipids.org/api/index.php/search?term={key}" 

102 response = requests.get(url, headers=headers) 

103 if response.status_code == 200: 

104 data = response.json()[0] 

105 detailed_data = get_swisslipids_info(data['entity_id']) 

106 data['synonyms'] = detailed_data.get('synonyms', []) 

107 return data 

108 else: 

109 return False 

110 

111def get_swisslipids_info(entity_id) -> dict: 

112 """Get information about a SwissLipids entry.""" 

113 headers = {'accept': 'json'} 

114 url = f"https://www.swisslipids.org/api/index.php/entity/{entity_id}" 

115 response = requests.get(url, headers=headers) 

116 if response.status_code == 200: 

117 return response.json() 

118 else: 

119 return False