Coverage for mddb_workflow / tools / residue_mapping.py: 77%
62 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-03 18:45 +0000
1from mddb_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG
2from mddb_workflow.utils.type_hints import *
5def generate_residue_mapping(
6 protein_map: list[dict],
7 inchikey_map: list[dict],
8 structure: 'Structure',
9) -> dict:
10 """Build the residue map from both proteins and ligands maps.
12 This is formatted as both the standard topology and metadata generators expect them.
13 """
14 # Reformat mapping data to the topology system
15 # Add the reference type to each reference object
16 for data in inchikey_map:
17 data['type'] = 'inchikey'
18 for data in protein_map:
19 data['type'] = 'protein'
21 # Get the count of residues from the structure
22 residues_count = len(structure.residues)
24 # Now format data
25 reference_ids = []
26 reference_types = []
27 residue_reference_indices = [None] * residues_count
28 residue_reference_numbers = [None] * residues_count
30 for data in protein_map + inchikey_map:
31 if data['type'] == 'protein':
32 match = data['match']
33 # Get the reference index
34 # Note that several matches may belong to the same reference and thus have the same index
35 reference = match['ref']
36 else:
37 reference = data.get('inchikey', None)
38 # If reference is missing at this point then it means we failed to find a matching reference
39 if reference is None:
40 continue
41 # If we have the "no referable" flag
42 if reference == NO_REFERABLE_FLAG:
43 if NO_REFERABLE_FLAG not in reference_ids:
44 reference_ids.append(NO_REFERABLE_FLAG)
45 reference_types.append('protein')
46 reference_index = reference_ids.index(NO_REFERABLE_FLAG)
47 for residue_index in data['residue_indices']:
48 residue_reference_indices[residue_index] = reference_index
49 continue
50 # If we have the "not found" flag
51 if reference == NOT_FOUND_FLAG:
52 if NOT_FOUND_FLAG not in reference_ids:
53 reference_ids.append(NOT_FOUND_FLAG)
54 reference_types.append('protein')
55 reference_index = reference_ids.index(NOT_FOUND_FLAG)
56 for residue_index in data['residue_indices']:
57 residue_reference_indices[residue_index] = reference_index
58 continue
59 # Get the reference type
60 reference_type = data['type']
61 # Get the reference id
62 reference_id = None
63 if reference_type == 'protein':
64 reference_id = reference['uniprot']
65 elif reference_type == 'inchikey':
66 reference_id = reference
67 else:
68 raise ValueError('Not supported type ' + reference_type)
69 # If we have a regular reference id (i.e. not a no referable / not found flag)
70 if reference_id not in reference_ids:
71 reference_ids.append(reference_id)
72 reference_types.append(reference_type)
73 reference_index = reference_ids.index(reference_id)
74 # Set the topology reference index and number for each residue
75 # Note that ligands do not have any residue reference numbering
76 if reference_type == 'protein':
77 for residue_index, residue_number in zip(data['residue_indices'], match['map']):
78 if residue_number is None:
79 continue
80 residue_reference_indices[residue_index] = reference_index
81 residue_reference_numbers[residue_index] = residue_number
82 if reference_type in ['inchikey']:
83 for residue_index in data['residue_indices']:
84 residue_reference_indices[int(residue_index)] = reference_index
86 # If there are not references at the end then set all fields as None, in order to save space
87 if len(reference_ids) == 0:
88 reference_ids = None
89 reference_types = None
90 residue_reference_indices = None
91 residue_reference_numbers = None
93 residue_map = {
94 'references': reference_ids,
95 'reference_types': reference_types,
96 'residue_reference_indices': residue_reference_indices,
97 'residue_reference_numbers': residue_reference_numbers
98 }
100 return residue_map