Coverage for model_workflow/tools/residue_mapping.py: 65%

60 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1from model_workflow.utils.constants import NO_REFERABLE_FLAG, NOT_FOUND_FLAG 

2from model_workflow.utils.type_hints import * 

3 

4def generate_residue_mapping( 

5 protein_map : List[dict], 

6 ligand_map : List[dict], 

7 structure : 'Structure', 

8) -> dict: 

9 """Build the residue map from both proteins and ligands maps 

10 This is formatted as both the standard topology and metadata generators expect them.""" 

11 

12 # Reformat mapping data to the topology system 

13 

14 # Add the reference type to each reference object 

15 for data in protein_map: 

16 data['type'] = 'protein' 

17 for data in ligand_map: 

18 data['type'] = 'ligand' 

19 

20 # Get the count of residues from the structure 

21 residues_count = len(structure.residues) 

22 

23 # Now format data 

24 reference_ids = [] 

25 reference_types = [] 

26 residue_reference_indices = [ None ] * residues_count 

27 residue_reference_numbers = [ None ] * residues_count 

28 

29 for data in protein_map + ligand_map: 

30 match = data['match'] 

31 # Get the reference index 

32 # Note that several matches may belong to the same reference and thus have the same index 

33 reference = match['ref'] 

34 # If reference is missing at this point then it means we failed to find a matching reference 

35 if reference == None: 

36 continue 

37 # If we have the "no referable" flag 

38 if reference == NO_REFERABLE_FLAG: 

39 if NO_REFERABLE_FLAG not in reference_ids: 

40 reference_ids.append(NO_REFERABLE_FLAG) 

41 reference_types.append('protein') 

42 reference_index = reference_ids.index(NO_REFERABLE_FLAG) 

43 for residue_index in data['residue_indices']: 

44 residue_reference_indices[residue_index] = reference_index 

45 continue 

46 # If we have the "not found" flag 

47 if reference == NOT_FOUND_FLAG: 

48 if NOT_FOUND_FLAG not in reference_ids: 

49 reference_ids.append(NOT_FOUND_FLAG) 

50 reference_types.append('protein') 

51 reference_index = reference_ids.index(NOT_FOUND_FLAG) 

52 for residue_index in data['residue_indices']: 

53 residue_reference_indices[residue_index] = reference_index 

54 continue 

55 # Get the reference type 

56 reference_type = data['type'] 

57 # Get the reference id 

58 reference_id = None 

59 if reference_type == 'protein': 

60 reference_id = reference['uniprot'] 

61 elif reference_type == 'ligand': 

62 reference_id = reference['pubchem'] 

63 else: 

64 raise ValueError('Not supported type ' + reference_type) 

65 # If we have a regular reference id (i.e. not a no referable / not found flag) 

66 if reference_id not in reference_ids: 

67 reference_ids.append(reference_id) 

68 reference_types.append(reference_type) 

69 reference_index = reference_ids.index(reference_id) 

70 # Set the topology reference number and index for each residue 

71 # Note that ligands do not have any residue reference numbering 

72 if reference_type == 'protein': 

73 for residue_index, residue_number in zip(data['residue_indices'], match['map']): 

74 if residue_number == None: 

75 continue 

76 residue_reference_indices[residue_index] = reference_index 

77 residue_reference_numbers[residue_index] = residue_number 

78 if reference_type == 'ligand': 

79 for residue_index in data['residue_indices']: 

80 residue_reference_indices[int(residue_index)] = reference_index 

81 # If there are not references at the end then set all fields as None, in order to save space 

82 if len(reference_ids) == 0: 

83 reference_ids = None 

84 reference_types = None 

85 residue_reference_indices = None 

86 residue_reference_numbers = None 

87 

88 residue_map = { 

89 'references': reference_ids, 

90 'reference_types': reference_types, 

91 'residue_reference_indices': residue_reference_indices, 

92 'residue_reference_numbers': residue_reference_numbers 

93 } 

94 

95 return residue_map