Coverage for model_workflow/tools/generate_topology.py: 91%

44 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1from model_workflow.utils.auxiliar import warn, save_json, MISSING_CHARGES 

2from model_workflow.utils.auxiliar import MISSING_BONDS, JSON_SERIALIZABLE_MISSING_BONDS 

3from model_workflow.utils.type_hints import * 

4 

5def generate_topology ( 

6 structure : 'Structure', 

7 charges : List[int], 

8 residue_map : dict, 

9 pbc_residues : List[int], 

10 cg_residues : List[int], 

11 output_filepath : str 

12): 

13 """Prepare the standard topology file to be uploaded to the database.""" 

14 

15 # The structure will be a bunch of arrays 

16 # Atom data 

17 structure_atoms = structure.atoms 

18 atom_count = len(structure_atoms) 

19 atom_names = [ None ] * atom_count 

20 atom_elements = [ None ] * atom_count 

21 atom_residue_indices = [ None ] * atom_count 

22 for index, atom in enumerate(structure_atoms): 

23 atom_names[index] = atom.name 

24 atom_elements[index] = atom.element 

25 atom_residue_indices[index] = atom.residue.index 

26 

27 # Set the atom bonds 

28 # In order to make it more standard sort atom bonds by their indices 

29 # Also replace missing bonds exceptions by a json serializable flag 

30 atom_bonds = [] 

31 for atom_indices in structure.bonds: 

32 if atom_indices == MISSING_BONDS: 

33 atom_bonds.append(JSON_SERIALIZABLE_MISSING_BONDS) 

34 continue 

35 atom_bonds.append(sorted(atom_indices)) 

36 

37 # Residue data 

38 structure_residues = structure.residues 

39 residue_count = len(structure_residues) 

40 residue_names = [ None ] * residue_count 

41 residue_numbers = [ None ] * residue_count 

42 # Icodes are saved as a dictionary since usually only a few residues have icodes (or none) 

43 # Resiude ids are used as keys and, when loaded to mongo, they will become strings 

44 # Saving icodes as an array would be unefficient since it will result in an array filled with nulls 

45 residue_icodes = {} 

46 residue_chain_indices = [ None ] * residue_count 

47 for index, residue in enumerate(structure_residues): 

48 residue_names[index] = residue.name 

49 residue_numbers[index] = residue.number 

50 if residue.icode: 

51 residue_icodes[index] = residue.icode 

52 residue_chain_indices[index] = residue.chain.index 

53 

54 # In case there are not icodes at all set the icodes as None (i.e. null for mongo) 

55 if len(list(residue_icodes.keys())) == 0: 

56 residue_icodes = None 

57 

58 # Chain data 

59 structure_chains = structure.chains 

60 chain_count = len(structure_chains) 

61 chain_names = [ None ] * chain_count 

62 for index, chain in enumerate(structure_chains): 

63 chain_names[index] = chain.name 

64 

65 # Check we have charges and, if not, set charges as None (i.e. null for mongo) 

66 has_charges = charges != MISSING_CHARGES and charges != None and len(charges) > 0 

67 atom_charges = charges if has_charges else None 

68 if not atom_charges: 

69 warn('Topology is missing atom charges') 

70 

71 # Setup the final output 

72 topology = { 

73 'atom_names': atom_names, 

74 'atom_elements': atom_elements, 

75 'atom_charges': atom_charges, 

76 'atom_residue_indices': atom_residue_indices, 

77 'atom_bonds': atom_bonds, 

78 'residue_names': residue_names, 

79 'residue_numbers': residue_numbers, 

80 'residue_icodes': residue_icodes, 

81 'residue_chain_indices': residue_chain_indices, 

82 'chain_names': chain_names, 

83 # Residues map 

84 **residue_map, 

85 # Save also some residue indices lists here 

86 'pbc_residues': pbc_residues, 

87 'cg_residues': cg_residues, 

88 } 

89 save_json(topology, output_filepath)