Coverage for mddb_workflow/tools/residue_mapping.py: 67%

64 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1from mddb_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG 

2from mddb_workflow.utils.type_hints import * 

3 

4def generate_residue_mapping( 

5 protein_map : list[dict], 

6 ligand_map : list[dict], 

7 lipid_map : list[dict], 

8 structure : 'Structure', 

9) -> dict: 

10 """ 

11 Build the residue map from both proteins and ligands maps. 

12 This is formatted as both the standard topology and metadata generators expect them. 

13 Task: resmap 

14 Args: 

15 ligand_map = { 'name': pubchem_id, 'residue_indices': residue_indices, 'match': { 'ref': { 'pubchem': pubchem_id } } } 

16 

17 """ 

18 

19 # Reformat mapping data to the topology system 

20 # Add the reference type to each reference object 

21 for data in lipid_map: 

22 data['type'] = 'inchikey' 

23 for data in protein_map: 

24 data['type'] = 'protein' 

25 for data in ligand_map: 

26 data['type'] = 'ligand' 

27 

28 # Get the count of residues from the structure 

29 residues_count = len(structure.residues) 

30 

31 # Now format data 

32 reference_ids = [] 

33 reference_types = [] 

34 residue_reference_indices = [ None ] * residues_count 

35 residue_reference_numbers = [ None ] * residues_count 

36 

37 for data in protein_map + ligand_map + lipid_map: 

38 match = data['match'] 

39 # Get the reference index 

40 # Note that several matches may belong to the same reference and thus have the same index 

41 reference = match['ref'] 

42 # If reference is missing at this point then it means we failed to find a matching reference 

43 if reference == None: 

44 continue 

45 # If we have the "no referable" flag 

46 if reference == NO_REFERABLE_FLAG: 

47 if NO_REFERABLE_FLAG not in reference_ids: 

48 reference_ids.append(NO_REFERABLE_FLAG) 

49 reference_types.append('protein') 

50 reference_index = reference_ids.index(NO_REFERABLE_FLAG) 

51 for residue_index in data['residue_indices']: 

52 residue_reference_indices[residue_index] = reference_index 

53 continue 

54 # If we have the "not found" flag 

55 if reference == NOT_FOUND_FLAG: 

56 if NOT_FOUND_FLAG not in reference_ids: 

57 reference_ids.append(NOT_FOUND_FLAG) 

58 reference_types.append('protein') 

59 reference_index = reference_ids.index(NOT_FOUND_FLAG) 

60 for residue_index in data['residue_indices']: 

61 residue_reference_indices[residue_index] = reference_index 

62 continue 

63 # Get the reference type 

64 reference_type = data['type'] 

65 # Get the reference id 

66 reference_id = None 

67 if reference_type == 'protein': 

68 reference_id = reference['uniprot'] 

69 elif reference_type == 'ligand': 

70 reference_id = reference['pubchem'] 

71 elif reference_type == 'inchikey': 

72 reference_id = reference['inchikey'] 

73 else: 

74 raise ValueError('Not supported type ' + reference_type) 

75 # If we have a regular reference id (i.e. not a no referable / not found flag) 

76 if reference_id not in reference_ids: 

77 reference_ids.append(reference_id) 

78 reference_types.append(reference_type) 

79 reference_index = reference_ids.index(reference_id) 

80 # Set the topology reference index and number for each residue 

81 # Note that ligands do not have any residue reference numbering 

82 if reference_type == 'protein': 

83 for residue_index, residue_number in zip(data['residue_indices'], match['map']): 

84 if residue_number == None: 

85 continue 

86 residue_reference_indices[residue_index] = reference_index 

87 residue_reference_numbers[residue_index] = residue_number 

88 if reference_type in ['ligand','inchikey']: 

89 for residue_index in data['residue_indices']: 

90 residue_reference_indices[int(residue_index)] = reference_index 

91 

92 # If there are not references at the end then set all fields as None, in order to save space 

93 if len(reference_ids) == 0: 

94 reference_ids = None 

95 reference_types = None 

96 residue_reference_indices = None 

97 residue_reference_numbers = None 

98 

99 residue_map = { 

100 'references': reference_ids, 

101 'reference_types': reference_types, 

102 'residue_reference_indices': residue_reference_indices, 

103 'residue_reference_numbers': residue_reference_numbers 

104 } 

105 

106 return residue_map