Coverage for mddb_workflow/tools/residue_mapping.py: 67%
64 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 15:48 +0000
1from mddb_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG
2from mddb_workflow.utils.type_hints import *
4def generate_residue_mapping(
5 protein_map : list[dict],
6 ligand_map : list[dict],
7 lipid_map : list[dict],
8 structure : 'Structure',
9) -> dict:
10 """
11 Build the residue map from both proteins and ligands maps.
12 This is formatted as both the standard topology and metadata generators expect them.
13 Task: resmap
14 Args:
15 ligand_map = { 'name': pubchem_id, 'residue_indices': residue_indices, 'match': { 'ref': { 'pubchem': pubchem_id } } }
17 """
19 # Reformat mapping data to the topology system
20 # Add the reference type to each reference object
21 for data in lipid_map:
22 data['type'] = 'inchikey'
23 for data in protein_map:
24 data['type'] = 'protein'
25 for data in ligand_map:
26 data['type'] = 'ligand'
28 # Get the count of residues from the structure
29 residues_count = len(structure.residues)
31 # Now format data
32 reference_ids = []
33 reference_types = []
34 residue_reference_indices = [ None ] * residues_count
35 residue_reference_numbers = [ None ] * residues_count
37 for data in protein_map + ligand_map + lipid_map:
38 match = data['match']
39 # Get the reference index
40 # Note that several matches may belong to the same reference and thus have the same index
41 reference = match['ref']
42 # If reference is missing at this point then it means we failed to find a matching reference
43 if reference == None:
44 continue
45 # If we have the "no referable" flag
46 if reference == NO_REFERABLE_FLAG:
47 if NO_REFERABLE_FLAG not in reference_ids:
48 reference_ids.append(NO_REFERABLE_FLAG)
49 reference_types.append('protein')
50 reference_index = reference_ids.index(NO_REFERABLE_FLAG)
51 for residue_index in data['residue_indices']:
52 residue_reference_indices[residue_index] = reference_index
53 continue
54 # If we have the "not found" flag
55 if reference == NOT_FOUND_FLAG:
56 if NOT_FOUND_FLAG not in reference_ids:
57 reference_ids.append(NOT_FOUND_FLAG)
58 reference_types.append('protein')
59 reference_index = reference_ids.index(NOT_FOUND_FLAG)
60 for residue_index in data['residue_indices']:
61 residue_reference_indices[residue_index] = reference_index
62 continue
63 # Get the reference type
64 reference_type = data['type']
65 # Get the reference id
66 reference_id = None
67 if reference_type == 'protein':
68 reference_id = reference['uniprot']
69 elif reference_type == 'ligand':
70 reference_id = reference['pubchem']
71 elif reference_type == 'inchikey':
72 reference_id = reference['inchikey']
73 else:
74 raise ValueError('Not supported type ' + reference_type)
75 # If we have a regular reference id (i.e. not a no referable / not found flag)
76 if reference_id not in reference_ids:
77 reference_ids.append(reference_id)
78 reference_types.append(reference_type)
79 reference_index = reference_ids.index(reference_id)
80 # Set the topology reference index and number for each residue
81 # Note that ligands do not have any residue reference numbering
82 if reference_type == 'protein':
83 for residue_index, residue_number in zip(data['residue_indices'], match['map']):
84 if residue_number == None:
85 continue
86 residue_reference_indices[residue_index] = reference_index
87 residue_reference_numbers[residue_index] = residue_number
88 if reference_type in ['ligand','inchikey']:
89 for residue_index in data['residue_indices']:
90 residue_reference_indices[int(residue_index)] = reference_index
92 # If there are not references at the end then set all fields as None, in order to save space
93 if len(reference_ids) == 0:
94 reference_ids = None
95 reference_types = None
96 residue_reference_indices = None
97 residue_reference_numbers = None
99 residue_map = {
100 'references': reference_ids,
101 'reference_types': reference_types,
102 'residue_reference_indices': residue_reference_indices,
103 'residue_reference_numbers': residue_reference_numbers
104 }
106 return residue_map