Coverage for model_workflow/tools/residue_mapping.py: 65%
60 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1from model_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG
2from model_workflow.utils.type_hints import *
4def generate_residue_mapping(
5 protein_map : List[dict],
6 ligand_map : List[dict],
7 structure : 'Structure',
8) -> dict:
9 """Build the residue map from both proteins and ligands maps
10 This is formatted as both the standard topology and metadata generators expect them."""
12 # Reformat mapping data to the topology system
14 # Add the reference type to each reference object
15 for data in protein_map:
16 data['type'] = 'protein'
17 for data in ligand_map:
18 data['type'] = 'ligand'
20 # Get the count of residues from the structure
21 residues_count = len(structure.residues)
23 # Now format data
24 reference_ids = []
25 reference_types = []
26 residue_reference_indices = [ None ] * residues_count
27 residue_reference_numbers = [ None ] * residues_count
29 for data in protein_map + ligand_map:
30 match = data['match']
31 # Get the reference index
32 # Note that several matches may belong to the same reference and thus have the same index
33 reference = match['ref']
34 # If reference is missing at this point then it means we failed to find a matching reference
35 if reference == None:
36 continue
37 # If we have the "no referable" flag
38 if reference == NO_REFERABLE_FLAG:
39 if NO_REFERABLE_FLAG not in reference_ids:
40 reference_ids.append(NO_REFERABLE_FLAG)
41 reference_types.append('protein')
42 reference_index = reference_ids.index(NO_REFERABLE_FLAG)
43 for residue_index in data['residue_indices']:
44 residue_reference_indices[residue_index] = reference_index
45 continue
46 # If we have the "not found" flag
47 if reference == NOT_FOUND_FLAG:
48 if NOT_FOUND_FLAG not in reference_ids:
49 reference_ids.append(NOT_FOUND_FLAG)
50 reference_types.append('protein')
51 reference_index = reference_ids.index(NOT_FOUND_FLAG)
52 for residue_index in data['residue_indices']:
53 residue_reference_indices[residue_index] = reference_index
54 continue
55 # Get the reference type
56 reference_type = data['type']
57 # Get the reference id
58 reference_id = None
59 if reference_type == 'protein':
60 reference_id = reference['uniprot']
61 elif reference_type == 'ligand':
62 reference_id = reference['pubchem']
63 else:
64 raise ValueError('Not supported type ' + reference_type)
65 # If we have a regular reference id (i.e. not a no referable / not found flag)
66 if reference_id not in reference_ids:
67 reference_ids.append(reference_id)
68 reference_types.append(reference_type)
69 reference_index = reference_ids.index(reference_id)
70 # Set the topology reference number and index for each residue
71 # Note that ligands do not have any residue reference numbering
72 if reference_type == 'protein':
73 for residue_index, residue_number in zip(data['residue_indices'], match['map']):
74 if residue_number == None:
75 continue
76 residue_reference_indices[residue_index] = reference_index
77 residue_reference_numbers[residue_index] = residue_number
78 if reference_type == 'ligand':
79 for residue_index in data['residue_indices']:
80 residue_reference_indices[int(residue_index)] = reference_index
81 # If there are not references at the end then set all fields as None, in order to save space
82 if len(reference_ids) == 0:
83 reference_ids = None
84 reference_types = None
85 residue_reference_indices = None
86 residue_reference_numbers = None
88 residue_map = {
89 'references': reference_ids,
90 'reference_types': reference_types,
91 'residue_reference_indices': residue_reference_indices,
92 'residue_reference_numbers': residue_reference_numbers
93 }
95 return residue_map