Coverage for model_workflow/tools/generate_topology.py: 91%
44 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-23 10:54 +0000
1from model_workflow.utils.auxiliar import warn, save_json, MISSING_CHARGES
2from model_workflow.utils.auxiliar import MISSING_BONDS, JSON_SERIALIZABLE_MISSING_BONDS
3from model_workflow.utils.type_hints import *
5def generate_topology (
6 structure : 'Structure',
7 charges : List[int],
8 residue_map : dict,
9 pbc_residues : List[int],
10 cg_residues : List[int],
11 output_filepath : str
12):
13 """Prepare the standard topology file to be uploaded to the database."""
15 # The structure will be a bunch of arrays
16 # Atom data
17 structure_atoms = structure.atoms
18 atom_count = len(structure_atoms)
19 atom_names = [ None ] * atom_count
20 atom_elements = [ None ] * atom_count
21 atom_residue_indices = [ None ] * atom_count
22 for index, atom in enumerate(structure_atoms):
23 atom_names[index] = atom.name
24 atom_elements[index] = atom.element
25 atom_residue_indices[index] = atom.residue.index
27 # Set the atom bonds
28 # In order to make it more standard sort atom bonds by their indices
29 # Also replace missing bonds exceptions by a json serializable flag
30 atom_bonds = []
31 for atom_indices in structure.bonds:
32 if atom_indices == MISSING_BONDS:
33 atom_bonds.append(JSON_SERIALIZABLE_MISSING_BONDS)
34 continue
35 atom_bonds.append(sorted(atom_indices))
37 # Residue data
38 structure_residues = structure.residues
39 residue_count = len(structure_residues)
40 residue_names = [ None ] * residue_count
41 residue_numbers = [ None ] * residue_count
42 # Icodes are saved as a dictionary since usually only a few residues have icodes (or none)
43 # Resiude ids are used as keys and, when loaded to mongo, they will become strings
44 # Saving icodes as an array would be unefficient since it will result in an array filled with nulls
45 residue_icodes = {}
46 residue_chain_indices = [ None ] * residue_count
47 for index, residue in enumerate(structure_residues):
48 residue_names[index] = residue.name
49 residue_numbers[index] = residue.number
50 if residue.icode:
51 residue_icodes[index] = residue.icode
52 residue_chain_indices[index] = residue.chain.index
54 # In case there are not icodes at all set the icodes as None (i.e. null for mongo)
55 if len(list(residue_icodes.keys())) == 0:
56 residue_icodes = None
58 # Chain data
59 structure_chains = structure.chains
60 chain_count = len(structure_chains)
61 chain_names = [ None ] * chain_count
62 for index, chain in enumerate(structure_chains):
63 chain_names[index] = chain.name
65 # Check we have charges and, if not, set charges as None (i.e. null for mongo)
66 has_charges = charges != MISSING_CHARGES and charges != None and len(charges) > 0
67 atom_charges = charges if has_charges else None
68 if not atom_charges:
69 warn('Topology is missing atom charges')
71 # Setup the final output
72 topology = {
73 'atom_names': atom_names,
74 'atom_elements': atom_elements,
75 'atom_charges': atom_charges,
76 'atom_residue_indices': atom_residue_indices,
77 'atom_bonds': atom_bonds,
78 'residue_names': residue_names,
79 'residue_numbers': residue_numbers,
80 'residue_icodes': residue_icodes,
81 'residue_chain_indices': residue_chain_indices,
82 'chain_names': chain_names,
83 # Residues map
84 **residue_map,
85 # Save also some residue indices lists here
86 'pbc_residues': pbc_residues,
87 'cg_residues': cg_residues,
88 }
89 save_json(topology, output_filepath)