Coverage for model_workflow/tools/generate_membrane_mapping.py: 85%

65 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1import os 

2import numpy as np 

3from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes, parse_index 

4from model_workflow.utils.auxiliar import save_json 

5from model_workflow.utils.type_hints import * 

6from contextlib import redirect_stdout 

7 

8def generate_membrane_mapping(lipid_map : List[dict], 

9 structure_file : 'File', 

10 universe: 'Universe', 

11 output_filepath: str, 

12 ) -> List[dict]: 

13 """ 

14 Generates a list of residue numbers of membrane components from a given structure and topology file. 

15 Args: 

16 structure (Structure): The molecular structure to analyze. 

17 standard_topology_file (File): The topology file in JSON format. 

18 structure_file (File): The structure file. 

19 debug (bool, optional): If True, additional debug information is returned. Defaults to False. 

20  

21 Returns: 

22 List[dict]: A list containing the membrane mapping. If debug is True, additional information is returned. 

23  

24 Raises: 

25 AssertionError: If the topology file is not in JSON format. 

26  

27 Notes: 

28 - The function identifies lipid and non-lipid residues based on InChI keys. 

29 - It classifies residues and checks for potential misclassifications. 

30 - Lipid residues are selected and neighboring lipids are found. 

31 - Clusters of lipids are identified, and clusters with more than 30 lipids are considered as membranes. 

32 - If debug is enabled, the function returns additional information including lipid residues, neighbors, counts, and clusters. 

33 """ 

34 

35 if not universe: raise RuntimeError('Missing universe') 

36 

37 # Prepare the membrane mapping OBJ/JSON 

38 mem_map_js = {'n_mems': 0, 'mems': {}, 'no_mem_lipid': {}} 

39 # if no lipids are found, we save the empty mapping and return 

40 if len(lipid_map) == 0: 

41 # no lipids found in the structure. 

42 save_json(mem_map_js, output_filepath) 

43 return mem_map_js 

44 

45 # Select only the lipids and potential membrane members 

46 lipid_ridx = [] 

47 for ref in lipid_map: 

48 lipid_ridx.extend(ref['resindices']) 

49 # if no lipids are found, we save the empty mapping and return 

50 if len(lipid_ridx) == 0: 

51 # no lipids found in the structure. 

52 save_json(mem_map_js, output_filepath) 

53 return mem_map_js 

54 glclipid_ridx = [grp for ref in lipid_map for grp in ref.get('resgroups', []) if len(grp) > 1] 

55 mem_candidates = universe.select_atoms(f'(resindex {" ".join(map(str,(lipid_ridx)))})') 

56 

57 # for better leaflet assignation we only use polar atoms 

58 charges = abs(np.array([atom.charge for atom in universe.atoms])) 

59 polar_atoms = [] 

60 for ridx in mem_candidates.residues.resindices: 

61 res = universe.residues[ridx] 

62 res_ch = charges[res.atoms.ix] 

63 max_ch_idx = np.argmax(res_ch) 

64 polar_atoms.append(res.atoms[max_ch_idx].index) 

65 polar_atoms = np.array(polar_atoms) 

66 headgroup_sel = f'(index {" ".join(map(str,(polar_atoms)))})' 

67 # Run FATSLiM to find the membranes 

68 prop = { 

69 'selection': headgroup_sel, 

70 'cutoff': 2.2, 

71 'ignore_no_box': True, 

72 'disable_logs': True, 

73 'return_hydrogen': True 

74 } 

75 output_ndx_path = "tmp_mem_map.ndx" 

76 print(' Running BioBB FATSLiM Membranes') 

77 with redirect_stdout(None): 

78 fatslim_membranes(input_top_path=structure_file.absolute_path, 

79 output_ndx_path=output_ndx_path, 

80 properties=prop) 

81 # Parse the output to get the membrane mapping 

82 mem_map = parse_index(output_ndx_path) 

83 os.remove(output_ndx_path) 

84 # Save the membrane mapping as a JSON file 

85 n_mems = len(mem_map)//2 

86 mem_map_js['n_mems'] = n_mems 

87 no_mem_lipids = set(mem_candidates.atoms.indices) 

88 for i in range(n_mems): 

89 # and they are not assigned to any membrane. FATSLiM indexes start at 1 

90 bot = (np.array(mem_map[f'membrane_{i+1}_leaflet_1'])-1).tolist() # JSON does not support numpy arrays 

91 top = (np.array(mem_map[f'membrane_{i+1}_leaflet_2'])-1).tolist() 

92 top_ridx = universe.atoms[top].residues.resindices 

93 bot_rdix = universe.atoms[bot].residues.resindices 

94 # Some polar atoms from the glucids are to far from the polar atoms of the lipids 

95 top = set(top) 

96 bot = set(bot) 

97 remove_ridx = [] 

98 for grp in glclipid_ridx: 

99 if np.in1d(grp, top_ridx).any(): 

100 top.update(universe.residues[grp].atoms.indices) 

101 remove_ridx.append(grp) 

102 continue 

103 if np.in1d(grp,bot_rdix).any(): 

104 bot.update(universe.residues[grp].atoms.indices) 

105 remove_ridx.append(grp) 

106 for grp in remove_ridx: 

107 glclipid_ridx.remove(grp) 

108 # Remove lipids not assigned to any membrane 

109 no_mem_lipids -= top 

110 no_mem_lipids -= bot 

111 # Check in which leaflets each of the polar atoms is and save them 

112 mem_map_js['mems'][(str(i))] = { 

113 'leaflets': { 

114 'bot': list(map(int, bot)), 

115 'top': list(map(int, top)), 

116 }, 

117 'polar_atoms': { 

118 'bot': polar_atoms[np.in1d(polar_atoms, list(bot))].tolist(), 

119 'top': polar_atoms[np.in1d(polar_atoms, list(top))].tolist(), 

120 } 

121 } 

122 

123 mem_map_js['no_mem_lipid'] = list(map(int, no_mem_lipids)) 

124 # Print the results and save the membrane mapping 

125 no_mem_lipids_str = f'{len(glclipid_ridx)} lipid/s not assigned to any membrane.' if len(glclipid_ridx)>0 else '' 

126 print(f'{n_mems} membrane/s found. ' + no_mem_lipids_str) 

127 save_json(mem_map_js, output_filepath) 

128 return mem_map_js 

129