Coverage for mddb_workflow / tools / residue_mapping.py: 77%

62 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 18:45 +0000

1from mddb_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG 

2from mddb_workflow.utils.type_hints import * 

3 

4 

5def generate_residue_mapping( 

6 protein_map: list[dict], 

7 inchikey_map: list[dict], 

8 structure: 'Structure', 

9) -> dict: 

10 """Build the residue map from both proteins and ligands maps. 

11 

12 This is formatted as both the standard topology and metadata generators expect them. 

13 """ 

14 # Reformat mapping data to the topology system 

15 # Add the reference type to each reference object 

16 for data in inchikey_map: 

17 data['type'] = 'inchikey' 

18 for data in protein_map: 

19 data['type'] = 'protein' 

20 

21 # Get the count of residues from the structure 

22 residues_count = len(structure.residues) 

23 

24 # Now format data 

25 reference_ids = [] 

26 reference_types = [] 

27 residue_reference_indices = [None] * residues_count 

28 residue_reference_numbers = [None] * residues_count 

29 

30 for data in protein_map + inchikey_map: 

31 if data['type'] == 'protein': 

32 match = data['match'] 

33 # Get the reference index 

34 # Note that several matches may belong to the same reference and thus have the same index 

35 reference = match['ref'] 

36 else: 

37 reference = data.get('inchikey', None) 

38 # If reference is missing at this point then it means we failed to find a matching reference 

39 if reference is None: 

40 continue 

41 # If we have the "no referable" flag 

42 if reference == NO_REFERABLE_FLAG: 

43 if NO_REFERABLE_FLAG not in reference_ids: 

44 reference_ids.append(NO_REFERABLE_FLAG) 

45 reference_types.append('protein') 

46 reference_index = reference_ids.index(NO_REFERABLE_FLAG) 

47 for residue_index in data['residue_indices']: 

48 residue_reference_indices[residue_index] = reference_index 

49 continue 

50 # If we have the "not found" flag 

51 if reference == NOT_FOUND_FLAG: 

52 if NOT_FOUND_FLAG not in reference_ids: 

53 reference_ids.append(NOT_FOUND_FLAG) 

54 reference_types.append('protein') 

55 reference_index = reference_ids.index(NOT_FOUND_FLAG) 

56 for residue_index in data['residue_indices']: 

57 residue_reference_indices[residue_index] = reference_index 

58 continue 

59 # Get the reference type 

60 reference_type = data['type'] 

61 # Get the reference id 

62 reference_id = None 

63 if reference_type == 'protein': 

64 reference_id = reference['uniprot'] 

65 elif reference_type == 'inchikey': 

66 reference_id = reference 

67 else: 

68 raise ValueError('Not supported type ' + reference_type) 

69 # If we have a regular reference id (i.e. not a no referable / not found flag) 

70 if reference_id not in reference_ids: 

71 reference_ids.append(reference_id) 

72 reference_types.append(reference_type) 

73 reference_index = reference_ids.index(reference_id) 

74 # Set the topology reference index and number for each residue 

75 # Note that ligands do not have any residue reference numbering 

76 if reference_type == 'protein': 

77 for residue_index, residue_number in zip(data['residue_indices'], match['map']): 

78 if residue_number is None: 

79 continue 

80 residue_reference_indices[residue_index] = reference_index 

81 residue_reference_numbers[residue_index] = residue_number 

82 if reference_type in ['inchikey']: 

83 for residue_index in data['residue_indices']: 

84 residue_reference_indices[int(residue_index)] = reference_index 

85 

86 # If there are not references at the end then set all fields as None, in order to save space 

87 if len(reference_ids) == 0: 

88 reference_ids = None 

89 reference_types = None 

90 residue_reference_indices = None 

91 residue_reference_numbers = None 

92 

93 residue_map = { 

94 'references': reference_ids, 

95 'reference_types': reference_types, 

96 'residue_reference_indices': residue_reference_indices, 

97 'residue_reference_numbers': residue_reference_numbers 

98 } 

99 

100 return residue_map