Coverage for mddb_workflow/utils/structures.py: 84%

1771 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 15:48 +0000

1# Main handler of the toolbelt 

2import os 

3import re 

4import math 

5import pytraj 

6import MDAnalysis 

7import numpy as np 

8from bisect import bisect 

9 

10from mddb_workflow.utils.file import File 

11from mddb_workflow.utils.selections import Selection 

12from mddb_workflow.utils.vmd_spells import get_vmd_selection_atom_indices, get_covalent_bonds 

13from mddb_workflow.utils.mdt_spells import sort_trajectory_atoms 

14from mddb_workflow.utils.gmx_spells import make_index, read_ndx 

15from mddb_workflow.utils.auxiliar import InputError, MISSING_BONDS 

16from mddb_workflow.utils.auxiliar import is_imported, residue_name_to_letter, otherwise, warn 

17from mddb_workflow.utils.constants import SUPPORTED_ION_ELEMENTS, SUPPORTED_ELEMENTS 

18from mddb_workflow.utils.constants import STANDARD_COUNTER_CATION_ATOM_NAMES, STANDARD_COUNTER_ANION_ATOM_NAMES 

19from mddb_workflow.utils.constants import STANDARD_SOLVENT_RESIDUE_NAMES, STANDARD_COUNTER_ION_ATOM_NAMES 

20from mddb_workflow.utils.constants import STANDARD_DUMMY_ATOM_NAMES, DUMMY_ATOM_ELEMENT, CG_ATOM_ELEMENT 

21from mddb_workflow.utils.constants import PROTEIN_RESIDUE_NAME_LETTERS, NUCLEIC_RESIDUE_NAME_LETTERS 

22from mddb_workflow.utils.constants import DNA_RESIDUE_NAME_LETTERS, RNA_RESIDUE_NAME_LETTERS 

23from mddb_workflow.utils.constants import FATTY_RESIDUE_NAMES, STEROID_RESIDUE_NAMES 

24from mddb_workflow.utils.type_hints import * 

25 

26 

27# Set all available chains according to pdb standards 

28AVAILABLE_CAPS = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') 

29AVAILABLE_LOWS = list('abcdefghijklmnopqrstuvwxyz') 

30AVAILABLE_LETTERS = AVAILABLE_CAPS + AVAILABLE_LOWS 

31 

32# Set letters to be found in alphanumerical bases 

33hexadecimal_letters = set(AVAILABLE_CAPS[:6] + AVAILABLE_LOWS[:6]) 

34alphanumerical_letters = set(AVAILABLE_CAPS[6:] + AVAILABLE_LOWS[6:]) 

35 

36# Set the expected number of bonds for each atom according to its element 

37coherent_bonds_with_hydrogen = { 

38 'H': { 'min': 1, 'max': 1 }, 

39 'O': { 'min': 1, 'max': 2 }, 

40 'N': { 'min': 1, 'max': 4 }, 

41 'C': { 'min': 2, 'max': 4 }, 

42 'S': { 'min': 1, 'max': 6 }, 

43 'P': { 'min': 2, 'max': 6 }, 

44} 

45# WARNING: Not deeply tested 

46coherent_bonds_without_hydrogen = { 

47 'O': { 'min': 0, 'max': 2 }, 

48 'N': { 'min': 1, 'max': 3 }, 

49 'C': { 'min': 1, 'max': 3 }, 

50 'S': { 'min': 1, 'max': 4 }, 

51 'P': { 'min': 2, 'max': 4 }, 

52} 

53 

54# Set the limit of residue numbers according to PDB format (4 characters, starts count at 1) 

55# This means the last number is 9999 and it is equivalent to index 9998 

56pdb_last_decimal_residue_index = 9998 

57 

58class Atom: 

59 """An atom class.""" 

60 

61 def __init__ (self, 

62 name : Optional[str] = None, 

63 element : Optional[str] = None, 

64 coords : Optional[Coords] = None, 

65 ): 

66 self.name = name 

67 self.element = element 

68 self.coords = tuple(coords) if coords else None 

69 # Set variables to store references to other related instances 

70 # These variables will be set further by the structure 

71 self._structure = None 

72 self._index = None 

73 self._residue_index = None 

74 

75 def __repr__ (self): 

76 return '<Atom ' + self.name + '>' 

77 

78 def __eq__ (self, other): 

79 if type(self) != type(other): 

80 return False 

81 return self._residue_index == other._residue_index and self.name == other.name 

82 

83 def __hash__ (self): 

84 return hash((self.index)) 

85 

86 def get_structure (self) -> Optional['Structure']: 

87 """The parent structure (read only). 

88 This value is set by the structure itself.""" 

89 return self._structure 

90 structure: 'Structure' = property(get_structure, None, None, "The parent structure (read only)") 

91 

92 def get_index (self) -> Optional[int]: 

93 """The residue index according to parent structure residues (read only). 

94 This value is set by the structure itself.""" 

95 return self._index 

96 index = property(get_index, None, None, "The residue index according to parent structure residues (read only)") 

97 

98 def get_residue_index (self) -> int: 

99 """The atom residue index according to parent structure residues. 

100 If residue index is set then make changes in all the structure to make this change coherent.""" 

101 return self._residue_index 

102 def set_residue_index (self, new_residue_index : int): 

103 # If the new residue index is the current residue index then do nothing 

104 # WARNING: It is important to stop this here or it could delete a residue which is not to be deleted 

105 if new_residue_index == self.residue_index: 

106 return 

107 # If there is not strucutre yet it means the residue is beeing set before the structure 

108 # We just save the residue index and wait for the structure to be set 

109 if not self.structure: 

110 self._residue_index = new_residue_index 

111 return 

112 # Relational indices are updated through a top-down hierarchy 

113 # Affected residues are the ones to update this atom internal residue index 

114 new_residue = self.structure.residues[new_residue_index] 

115 new_residue.add_atom(self) 

116 residue_index = property(get_residue_index, set_residue_index, None, 

117 "The atom residue index according to parent structure residues") 

118 

119 def get_residue (self) -> Optional['Residue']: 

120 """The atom residue. 

121 If residue is set then make changes in all the structure to make this change coherent.""" 

122 # If there is not strucutre yet it means the atom is beeing set before the structure 

123 # In this case it is not possible to get related residues in the structure 

124 # It also may happend that the residue is missing because the atom has been set rawly 

125 if not self.structure or self.residue_index == None: 

126 return None 

127 # Get the residue in the structure according to the residue index 

128 return self.structure.residues[self.residue_index] 

129 def set_residue (self, new_residue : 'Residue'): 

130 """Find the new residue index and set it as the atom residue index. 

131 Note that the residue must be set in the structure already.""" 

132 new_residue_index = new_residue.index 

133 if new_residue_index == None: 

134 raise ValueError('Residue ' + str(new_residue) + ' is not set in the structure') 

135 self.set_residue_index(new_residue_index) 

136 residue = property(get_residue, set_residue, None, "The atom residue") 

137 

138 def get_chain_index (self) -> Optional[int]: 

139 """Get the atom chain index according to parent structure chains.""" 

140 # The residue may be missing if the atom has been set rawly 

141 if not self.residue: 

142 return None 

143 return self.residue.chain_index 

144 chain_index = property(get_chain_index, None, None, 

145 "The atom chain index according to parent structure chains (read only)") 

146 

147 def get_chain (self) -> Optional['Chain']: 

148 """The atom chain (read only). 

149 In order to change the chain it must be changed in the atom residue.""" 

150 # If there is not strucutre yet it means the atom is beeing set before the structure 

151 # In this case it is not possible to get related residues in the structure 

152 # It also may happend that the chain is missing because the atom has been set rawly 

153 if not self.structure or self.chain_index == None: 

154 return None 

155 # Get the chain in the structure according to the chain index 

156 return self.structure.chains[self.chain_index] 

157 chain: 'Chain' = property(get_chain, None, None, "The atom chain (read only)") 

158 

159 def get_bonds (self, skip_ions : bool = False, skip_dummies : bool = False) -> Optional[ list[int] ]: 

160 """Get indices of other atoms in the structure which are covalently bonded to this atom.""" 

161 if not self.structure: 

162 raise ValueError('The atom has not a structure defined') 

163 if self.index == None: 

164 raise ValueError('The atom has not an index defined') 

165 bonds = self.structure.bonds[self.index] 

166 # If the skip ions argument is passed then remove atom indices of supported ion atoms 

167 if skip_ions: 

168 bonds = list(set(bonds) - self.structure.ion_atom_indices) 

169 if skip_dummies: 

170 bonds = list(set(bonds) - self.structure.dummy_atom_indices) 

171 return bonds 

172 

173 # Atoms indices of atoms in the structure which are covalently bonded to this atom 

174 bonds = property(get_bonds, None, None, 

175 'Atoms indices of atoms in the structure which are covalently bonded to this atom') 

176 

177 def get_bonded_atoms (self) -> list['Atom']: 

178 """Get bonded atoms.""" 

179 return [ self.structure.atoms[atom_index] for atom_index in self.bonds ] 

180 

181 def get_selection (self) -> 'Selection': 

182 """Generate a selection for this atom.""" 

183 return Selection([self.index]) 

184 

185 def copy (self) -> 'Atom': 

186 """Make a copy of the current atom.""" 

187 atom_copy = Atom(self.name, self.element, self.coords) 

188 atom_copy._structure = self._structure 

189 atom_copy._index = self._index 

190 atom_copy._residue_index = self._residue_index 

191 return atom_copy 

192 

193 def is_fatty_candidate (self) -> bool: 

194 """ 

195 Check if this atom meets specific criteria: 

196 1. It is a carbon 

197 2. It is connected only to other carbons and hydrogens 

198 3. It is connected to 1 or 2 carbons 

199 """ 

200 # Ignore non carbon atoms 

201 if self.element != 'C': 

202 return False 

203 # Get bonded atom elements 

204 bonded_atoms_elements = [ atom.element for atom in self.get_bonded_atoms() ] 

205 # Check only carbon and hydrogen atoms to be bonded 

206 if any((element != 'C' and element != 'H') for element in bonded_atoms_elements): 

207 return False 

208 # Check it is connected to 1 or 2 carbons 

209 connected_carbons_count = bonded_atoms_elements.count('C') 

210 if connected_carbons_count != 1 and connected_carbons_count != 2: 

211 return False 

212 return True 

213 

214 def is_carbohydrate_candidate (self) -> bool: 

215 """ 

216 Check if this atom meets specific criteria: 

217 1. It is a carbon 

218 2. It is connected only to other carbons, hydrogens or oxygens 

219 3. It is connected to 1 or 2 carbons 

220 4. It is connected to 1 oxygen 

221 """ 

222 # Ignore non carbon atoms 

223 if self.element != 'C': 

224 return False 

225 # Get bonded atom elements 

226 bonded_atoms_elements = [ atom.element for atom in self.get_bonded_atoms() ] 

227 # Check only carbon and hydrogen atoms to be bonded 

228 if any((element != 'C' and element != 'H' and element != 'O') for element in bonded_atoms_elements): 

229 return False 

230 # Check it is connected to 1 or 2 carbons 

231 connected_carbons_count = bonded_atoms_elements.count('C') 

232 if connected_carbons_count != 1 and connected_carbons_count != 2: 

233 return False 

234 # Check it is connected to 1 oxygen 

235 connected_oxygens_count = bonded_atoms_elements.count('O') 

236 if connected_oxygens_count != 1: 

237 return False 

238 return True 

239 

240 def is_ion (self) -> bool: 

241 """Check if it is an ion by checking if it has no bonds with other atoms.""" 

242 return len(self.bonds) == 0 

243 

244 def guess_element (self) -> str: 

245 """Guess an atom element from its name and number of bonds.""" 

246 # If the atom name is among the known dummy atoms then return a standard element for dummy atoms 

247 if self.name.upper() in STANDARD_DUMMY_ATOM_NAMES: 

248 return DUMMY_ATOM_ELEMENT 

249 # If the element is already the "coarse grained" flag then return this very same flag 

250 # WARNING: This has to be preset before guessing or the guess may fail 

251 if self.is_cg(): 

252 return CG_ATOM_ELEMENT 

253 # If the name is SOD and it is a lonely atom then it is clearly sodium, not sulfur 

254 if self.name.upper() == 'SOD' and self.residue.atom_count == 1: 

255 return 'Na' 

256 # If the name is POT and it is a lonely atom then it is clearly potassium, not phosphor 

257 if self.name.upper() == 'POT' and self.residue.atom_count == 1: 

258 return 'K' 

259 # Find a obvios element name in the atom name 

260 element = self.get_name_suggested_element() 

261 # CA may refer to calcium or alpha carbon, so the number of atoms in the residue is decisive 

262 if element == 'Ca' and self.residue.atom_count >= 2: 

263 return 'C' 

264 # NA may refer to sodium or some ligand nitrogen, so the number of atoms in the residue is decisive 

265 if element == 'Na' and self.residue.atom_count >= 2: 

266 return 'N' 

267 return element 

268 

269 def get_name_suggested_element (self) -> str: 

270 """Guess an atom element from its name only.""" 

271 # Get the atom name and its characters length 

272 name = self.name 

273 length = len(name) 

274 next_character = None 

275 for i, character in enumerate(name): 

276 # Get the next character, since element may be formed by 2 letters 

277 if i < length - 1: 

278 next_character = name[i+1] 

279 # If next character is not a string then ignore it 

280 if not next_character.isalpha(): 

281 next_character = None 

282 # Try to get all possible matches between the characters and the supported atoms 

283 # First letter is always caps 

284 character = character.upper() 

285 # First try to match both letters together 

286 if next_character: 

287 # Start with the second letter in caps 

288 next_character = next_character.upper() 

289 both = character + next_character 

290 if both in SUPPORTED_ELEMENTS: 

291 return both 

292 # Continue with the second letter in lowers 

293 next_character = next_character.lower() 

294 both = character + next_character 

295 if both in SUPPORTED_ELEMENTS: 

296 return both 

297 # Finally, try with the first character alone 

298 if character in SUPPORTED_ELEMENTS: 

299 return character 

300 raise InputError(f"Cannot guess atom element from atom name '{name}'") 

301 

302 def get_label (self) -> str: 

303 """Get a standard label.""" 

304 return f'{self.residue.label}.{self.name} (index {self.index})' 

305 # The atom standard label (read only) 

306 label = property(get_label, None, None) 

307 

308 # Ask if the current atom is in coarse grain 

309 def is_cg (self) -> bool: 

310 return self.element == CG_ATOM_ELEMENT 

311 

312class Residue: 

313 """A residue class.""" 

314 def __init__ (self, 

315 name : Optional[str] = None, 

316 number : Optional[int] = None, 

317 icode : Optional[str] = None, 

318 ): 

319 self.name = name 

320 self.number = number 

321 self.icode = icode 

322 # Set variables to store references to other related instaces 

323 # These variables will be set further by the structure 

324 self._structure = None 

325 self._index = None 

326 self._atom_indices = [] 

327 self._chain_index = None 

328 self._classification = None 

329 

330 def __repr__ (self): 

331 return f'<Residue {self.label}>' 

332 

333 def __eq__ (self, other): 

334 if type(self) != type(other): 

335 return False 

336 return ( 

337 self._chain_index == other._chain_index and 

338 #self.name == other.name and 

339 self.number == other.number and 

340 self.icode == other.icode 

341 ) 

342 

343 def __hash__ (self): 

344 # WARNING: This is susceptible to duplicated residues 

345 return hash((self._chain_index, self.number, self.icode)) 

346 # WARNING: This is not susceptible to duplicated residues 

347 #return hash(tuple(self._atom_indices)) 

348 

349 def get_structure (self) -> Optional['Structure']: 

350 """Get the parent structure (read only). 

351 This value is set by the structure itself.""" 

352 return self._structure 

353 structure: 'Structure' = property(get_structure, None, None, 

354 "The parent structure (read only)") 

355 

356 def get_index (self) -> Optional[int]: 

357 """Get the residue index according to parent structure residues (read only). 

358 This value is set by the structure itself.""" 

359 return self._index 

360 def set_index (self, index): 

361 # Update residue atoms 

362 for atom in self.atoms: 

363 atom._residue_index = index 

364 # Update residue chain 

365 chain_residue_index = self.chain._residue_indices.index(self._index) 

366 self.chain._residue_indices[chain_residue_index] = index 

367 # Finally update self index 

368 self._index = index 

369 index = property(get_index, set_index, None, 

370 "The residue index according to parent structure residues (read only)") 

371 

372 def get_atom_indices (self) -> list[int]: 

373 """Get the atom indices according to parent structure atoms for atoms in this residue. 

374 If atom indices are set then make changes in all the structure to make this change coherent.""" 

375 return self._atom_indices 

376 def set_atom_indices (self, new_atom_indices : list[int]): 

377 # If there is not strucutre yet it means the residue is beeing set before the structure 

378 # We just save atom indices and wait for the structure to be set 

379 if not self.structure: 

380 self._atom_indices = new_atom_indices 

381 return 

382 # Update the current atoms 

383 for atom in self.atoms: 

384 atom._residue_index = None 

385 # Update the new atoms 

386 for index in new_atom_indices: 

387 atom = self.structure.atoms[index] 

388 atom._residue_index = self.index 

389 # Now new indices are coherent and thus we can save them 

390 self._atom_indices = new_atom_indices 

391 atom_indices = property(get_atom_indices, set_atom_indices, None, 

392 "The atom indices according to parent structure atoms for atoms in this residue") 

393 

394 def get_atoms (self) -> list['Atom']: 

395 """Get the atoms in this residue. 

396 If atoms are set then make changes in all the structure to make this change coherent.""" 

397 # If there is not strucutre yet it means the chain is beeing set before the structure 

398 # In this case it is not possible to get related atoms in the structure 

399 if not self.structure: 

400 return [] 

401 # Get atoms in the structure according to atom indices 

402 atoms = self.structure.atoms 

403 return [ atoms[atom_index] for atom_index in self.atom_indices ] 

404 def set_atoms (self, new_atoms : list['Atom']): 

405 # Find indices for new atoms and set their indices as the new atom indices 

406 # Note that atoms must be set in the structure already 

407 new_atom_indices = [] 

408 for new_atom in new_atoms: 

409 new_atom_index = new_atom.index 

410 if new_atom_index == None: 

411 raise ValueError('Atom ' + str(new_atom) + ' is not set in the structure') 

412 new_atom_indices.append(new_atom_index) 

413 self.set_atom_indices(new_atom_indices) 

414 atoms: list['Atom'] = property(get_atoms, set_atoms, None, "The atoms in this residue") 

415 

416 # Get the number of atoms in the residue 

417 def get_atom_count (self) -> int: 

418 return len(self.atom_indices) 

419 atom_count = property(get_atom_count, None, None, "The number of atoms in the residue (read only)") 

420 

421 def add_atom (self, new_atom : 'Atom'): 

422 """Add an atom to the residue.""" 

423 # Remove the atom from its previous residue owner 

424 if new_atom.residue_index: 

425 new_atom.residue.remove_atom(new_atom) 

426 # Insert the new atom index in the list of atom indices keeping the order 

427 new_atom_index = new_atom.index 

428 sorted_atom_index = bisect(self.atom_indices, new_atom_index) 

429 self.atom_indices.insert(sorted_atom_index, new_atom_index) 

430 # Update the atom internal index 

431 new_atom._residue_index = self.index 

432 

433 def remove_atom (self, current_atom : 'Atom'): 

434 """Remove an atom from the residue.""" 

435 # Remove the current atom index from the atom indices list 

436 self.atom_indices.remove(current_atom.index) # This index MUST be in the list 

437 # If we removed the last atom then this residue must be removed from its structure 

438 if len(self.atom_indices) == 0 and self.structure: 

439 self.structure.purge_residue(self) 

440 # Update the atom internal index 

441 current_atom._residue_index = None 

442 

443 # The residue chain index according to parent structure chains 

444 # If chain index is set then make changes in all the structure to make this change coherent 

445 def get_chain_index (self) -> int: 

446 return self._chain_index 

447 def set_chain_index (self, new_chain_index : int): 

448 # If the new chain index is the current chain index do nothing 

449 # WARNING: It is important to stop this here or it could delete a chain which is not to be deleted 

450 if new_chain_index == self.chain_index: 

451 return 

452 # If there is not strucutre yet it means the chain is beeing set before the structure 

453 # We just save the chain index and wait for the structure to be set 

454 if not self.structure: 

455 self._chain_index = new_chain_index 

456 return 

457 # Relational indices are updated through a top-down hierarchy 

458 # Affected chains are the ones to update this residue internal chain index 

459 # WARNING: It is critical to find the new chain before removing/adding residues 

460 # WARNING: It may happend that we remove the last residue in the current chain and the current chain is purged 

461 # WARNING: Thus the 'new_chain_index' would be obsolete since the structure.chains list would have changed 

462 new_chain = self.structure.chains[new_chain_index] 

463 if self.chain: 

464 self.chain.remove_residue(self) 

465 new_chain.add_residue(self) 

466 chain_index = property(get_chain_index, set_chain_index, None, 

467 "The residue chain index according to parent structure chains") 

468 

469 # The residue chain 

470 # If chain is set then make changes in all the structure to make this change coherent 

471 def get_chain (self) -> Optional['Chain']: 

472 # If there is not strucutre yet then it means the residue is set before the structure 

473 # In this case it is not possible to get related chain in the structure 

474 if not self.structure or self.chain_index == None: 

475 return None 

476 # Get the chain in the structure according to the chain index 

477 return self.structure.chains[self.chain_index] 

478 def set_chain (self, new_chain : Union['Chain', str]): 

479 # In case the chain is just a string we must find/create the corresponding chain 

480 if type(new_chain) == str: 

481 letter = new_chain 

482 # Get the residue structure 

483 structure = self.structure 

484 if not structure: 

485 raise ValueError(f'Cannot find the corresponding {new_chain} chain without the structure') 

486 # Find if the letter belongs to an already existing chain 

487 new_chain = structure.get_chain_by_name(letter) 

488 # If the chain does not exist yet then create it 

489 if not new_chain: 

490 new_chain = Chain(name=letter) 

491 structure.set_new_chain(new_chain) 

492 # Find the new chain index and set it as the residue chain index 

493 # Note that the chain must be set in the structure already 

494 new_chain_index = new_chain.index 

495 if new_chain_index == None: 

496 raise ValueError(f'Chain {new_chain} is not set in the structure') 

497 self.set_chain_index(new_chain_index) 

498 chain: 'Chain' = property(get_chain, set_chain, None, "The residue chain") 

499 

500 def get_bonded_atom_indices (self) -> list[int]: 

501 """Get atom indices from atoms bonded to this residue.""" 

502 # Get all bonds among residue atoms 

503 all_bonds = [] 

504 for atom in self.atoms: 

505 all_bonds += atom.bonds 

506 # Remove self atom indices from the list 

507 external_bonds = set(all_bonds) - set(self.atom_indices) 

508 return list(external_bonds) 

509 

510 def get_bonded_atoms (self) -> list['Atom']: 

511 """Get atoms bonded to this residue.""" 

512 return [ self.structure.atoms[atom_index] for atom_index in self.get_bonded_atom_indices() ] 

513 

514 def get_bonded_residue_indices (self) -> list[int]: 

515 """Get residue indices from residues bonded to this residue.""" 

516 return list(set([ atom.residue_index for atom in self.get_bonded_atoms() ])) 

517 

518 def get_bonded_residues (self) -> list['Residue']: 

519 """Get residues bonded to this residue.""" 

520 return [ self.structure.residues[residue_index] for residue_index in self.get_bonded_residue_indices() ] 

521 

522 def is_bonded_with_residue (self, other : 'Residue') -> bool: 

523 """Given another residue, check if it is bonded with this residue.""" 

524 bonded_atom_indices = set(self.get_bonded_atom_indices()) 

525 if next((index for index in other.atom_indices if index in bonded_atom_indices), None) != None: return True 

526 return False 

527 

528 def is_missing_any_bonds (self) -> bool: 

529 return any(atom.bonds == MISSING_BONDS for atom in self.atoms) 

530 

531 def is_coherent (self) -> bool: 

532 """Make sure atoms within the residue are all bonded.""" 

533 # If bonds are missing then just say everything is 

534 if self.is_missing_any_bonds(): 

535 raise RuntimeError('Trying to check if residue with missing bonds is coherent') 

536 residue_selection = self.get_selection() 

537 residue_fragments = self.structure.find_fragments(residue_selection) 

538 first_residue_fragment = next(residue_fragments) 

539 return len(first_residue_fragment) == self.atom_count 

540 

541 def get_classification (self) -> str: 

542 """ 

543 Get the residue biochemistry classification. 

544 

545 WARNING: Note that this logic will not work in a structure without hydrogens. 

546 

547 Available classifications: 

548 - protein 

549 - dna 

550 - rna 

551 - carbohydrate 

552 - fatty 

553 - steroid 

554 - ion 

555 - solvent 

556 - acetyl 

557 - amide 

558 - other 

559 """ 

560 # Return the internal value, if any 

561 if self._classification: 

562 return self._classification 

563 # If this is a CG residue then we can not classify it 

564 if self.is_cg(): return self.get_classification_by_name() 

565 # If we dont have a value then we must classify the residue 

566 # ------------------------------------------------------------------------------------------------------- 

567 # Ions are single atom residues 

568 if self.atom_count == 1: 

569 self._classification = 'ion' 

570 return self._classification 

571 # ------------------------------------------------------------------------------------------------------- 

572 # At this point we need atoms to have elements fixed 

573 # WARNING: missing elements would result in silent failure to recognize some classifications 

574 if self.structure._fixed_atom_elements == False: 

575 self.structure.fix_atom_elements() 

576 # Solvent is water molecules 

577 # First rely on the residue name 

578 if self.name in STANDARD_SOLVENT_RESIDUE_NAMES: 

579 self._classification = 'solvent' 

580 return self._classification 

581 # It may be water with a not known name 

582 # Literally check if its a molecule with 3 atoms: 2 hydrogens and 1 oxygen 

583 if self.atom_count == 3: 

584 atom_elements = [ atom.element for atom in self.atoms ] 

585 if atom_elements.count('H') == 2 and atom_elements.count('O') == 1: 

586 self._classification = 'solvent' 

587 return self._classification 

588 # ------------------------------------------------------------------------------------------------------- 

589 # Protein definition according to vmd: 

590 # a residue with atoms named C, N, CA, and O 

591 # In our case we accept OC1 and OC2 or OT1 and OT2 instead of O for terminal residues 

592 atom_names = set([ atom.name for atom in self.atoms ]) 

593 if ( all((name in atom_names) for name in ['C', 'N', 'CA']) and ( 

594 'O' in atom_names or { 'OC1', 'OC2' } <= atom_names or { 'OT1', 'OT2' } <= atom_names 

595 )): 

596 self._classification = 'protein' 

597 return self._classification 

598 # ------------------------------------------------------------------------------------------------------- 

599 # Nucleic acids definition according to vmd: 

600 # a residue with atoms named P, O1P, O2P and either O3’, C3’, C4’, C5’, O5’ or O3*, C3*, C4*, C5*, O5* 

601 # Apparently it has been fixed so now a residue does not need to be phosphorylated to be considered nucleic 

602 # LORE: This included the condition "all((name in atom_names) for name in ['P', 'OP1', 'OP2'])" 

603 if (( all((name in atom_names) for name in ["O3'", "C3'", "C4'", "C5'", "O5'"]) or 

604 all((name in atom_names) for name in ["O3*", "C3*", "C4*", "C5*", "O5*"]) )): 

605 # At this point we know it is nucleic 

606 # We must tell the difference between DNA and RNA 

607 if "O2'" in atom_names or "O2*" in atom_names: 

608 self._classification = 'rna' 

609 else: 

610 self._classification = 'dna' 

611 return self._classification 

612 # ------------------------------------------------------------------------------------------------------- 

613 # To define carbohydrates search for rings made of 1 oxygen and 'n' carbons 

614 # WARNING: This logic may fail for some very specific molecules such as furine 

615 # LIMITATION: This logic only aims for cyclical carbohydrates. The linear form of carbohydrates is not yet consider 

616 rings = self.find_rings(6) 

617 for ring in rings: 

618 ring_elements = [ atom.element for atom in ring ] 

619 # Check the ring has 1 oxygen 

620 oxygen_count = ring_elements.count('O') 

621 if oxygen_count != 1: 

622 continue 

623 # Check the rest are only carbon 

624 carbon_count = ring_elements.count('C') 

625 if carbon_count != len(ring) - 1: 

626 continue 

627 self._classification = 'carbohydrate' 

628 return self._classification 

629 # ------------------------------------------------------------------------------------------------------- 

630 # To define steroids search for 3 6-carbon rings and 1 5-carbon ring (at least) 

631 # WARNING: According to this logic some exotical non-steroid molecules may result in false positives 

632 num_6_carbon_rings = 0 

633 num_5_carbon_rings = 0 

634 for ring in rings: 

635 ring_elements = [ atom.element for atom in ring ] 

636 if any(element != 'C' for element in ring_elements): 

637 continue 

638 ring_lenght = len(ring) 

639 if ring_lenght == 5: 

640 num_5_carbon_rings += 1 

641 if ring_lenght == 6: 

642 num_6_carbon_rings += 1 

643 if num_6_carbon_rings >= 3 and num_5_carbon_rings >= 1: 

644 self._classification = 'steroid' 

645 return self._classification 

646 # ------------------------------------------------------------------------------------------------------- 

647 # To define fatty acids search for a series of carbon atoms connected together (3 at least) 

648 # These carbon atoms must be connected only to hydrogen in addition to 1-2 carbon 

649 # These carbons must not be bonded in a cycle so check atoms not be repeated in the series 

650 bonded_fatty_atoms = [] 

651 def get_bonded_fatty_atoms_recursive (current_atom : Atom, previous_atom : Optional[Atom] = None) -> bool: 

652 # Iterate over the input atom bonded atoms 

653 for bonded_atom in current_atom.get_bonded_atoms(): 

654 # Discard the previous atom to avoid an infinite loop 

655 if bonded_atom == previous_atom: 

656 continue 

657 # If we find an atom which is already in the fatty atoms list then it means we are cycling 

658 if bonded_atom in bonded_fatty_atoms: 

659 return False 

660 # If this is not a fatty atom then discard it 

661 if not bonded_atom.is_fatty_candidate(): 

662 continue 

663 # Add the current bonded fatty atom to the list 

664 bonded_fatty_atoms.append(bonded_atom) 

665 # Find more bonded fatty atoms and add them to list as well 

666 if get_bonded_fatty_atoms_recursive(current_atom=bonded_atom, previous_atom=current_atom) == False: 

667 return False 

668 return True 

669 # Now check all atoms and try to set the series until we find one which works 

670 already_checked_atoms = [] 

671 for atom in self.atoms: 

672 # If we already checked this atom trough the recursive logic then skip it 

673 if atom in already_checked_atoms: 

674 continue 

675 # If the atom is not a fatty candidate then skip it 

676 if not atom.is_fatty_candidate(): 

677 continue 

678 # Now that we have found a suitable candidate atom we start the series 

679 bonded_fatty_atoms = [atom] 

680 if get_bonded_fatty_atoms_recursive(atom) and len(bonded_fatty_atoms) >= 3: 

681 self._classification = 'fatty' 

682 return self._classification 

683 already_checked_atoms += bonded_fatty_atoms 

684 # ------------------------------------------------------------------------------------------------------- 

685 # Check if it is an acetylation capping terminal 

686 if self.name == 'ACE': 

687 self._classification = 'acetyl' 

688 return self._classification 

689 # ------------------------------------------------------------------------------------------------------- 

690 # Check if it is an N-methyl amide capping terminal 

691 if self.name == 'NME': 

692 self._classification = 'amide' 

693 return self._classification 

694 # ------------------------------------------------------------------------------------------------------- 

695 self._classification = 'other' 

696 return self._classification 

697 

698 def get_classification_by_name (self) -> str: 

699 """ 

700 Set an alternative function to "try" to classify the residues according only to its name. 

701 This is useful for corase grain residues whose atoms may not reflect the real atoms. 

702 WARNING: This logic is very limited and will return "unknown" most of the times. 

703 WARNING: This logic relies mostly in atom names, which may be not standard. 

704 """ 

705 if self.name in PROTEIN_RESIDUE_NAME_LETTERS: 

706 return 'protein' 

707 if self.name in DNA_RESIDUE_NAME_LETTERS: 

708 return 'dna' 

709 if self.name in RNA_RESIDUE_NAME_LETTERS: 

710 return 'rna' 

711 if self.name in NUCLEIC_RESIDUE_NAME_LETTERS: 

712 return 'nucleic' 

713 if self.name in FATTY_RESIDUE_NAMES: 

714 return 'fatty' 

715 if self.name in STEROID_RESIDUE_NAMES: 

716 return 'steroid' 

717 if self.name in STANDARD_COUNTER_ION_ATOM_NAMES: 

718 return 'ion' 

719 if self.name in STANDARD_SOLVENT_RESIDUE_NAMES: 

720 return 'solvent' 

721 # If we do not know what it is 

722 return 'unknown' 

723 

724 # The residue biochemistry classification (read only) 

725 classification = property(get_classification, None, None) 

726 

727 def get_selection (self) -> 'Selection': 

728 """Generate a selection for this residue.""" 

729 return Selection(self.atom_indices) 

730 

731 def copy (self) -> 'Residue': 

732 """Make a copy of the current residue.""" 

733 residue_copy = Residue(self.name, self.number, self.icode) 

734 residue_copy._structure = self._structure 

735 residue_copy._index = self._index 

736 residue_copy._atom_indices = self._atom_indices 

737 residue_copy._chain_index = self._chain_index 

738 return residue_copy 

739 

740 def get_letter (self) -> str: 

741 """Get the residue equivalent single letter code. 

742 Note that this makes sense for aminoacids and nucleotides only. 

743 Non recognized residue names return 'X'.""" 

744 return residue_name_to_letter(self.name) 

745 

746 def get_formula (self) -> str: 

747 """Get the formula of the residue.""" 

748 elements = [ atom.element for atom in self.atoms ] 

749 unique_elements = list(set(elements)) 

750 formula = '' 

751 for unique_element in unique_elements: 

752 count = elements.count(unique_element) 

753 number = get_lower_numbers(str(count)) if count > 1 else '' 

754 formula += unique_element + number 

755 return formula 

756 

757 def find_rings (self, max_ring_size : int) -> list[ list[Atom] ]: 

758 """Find rings in the residue.""" 

759 return self.structure.find_rings(max_ring_size, selection=self.get_selection()) 

760 

761 def split (self, 

762 first_residue_atom_indices : list[int], 

763 second_residue_atom_indices : list[int], 

764 first_residue_name : Optional[str] = None, 

765 second_residue_name : Optional[str] = None, 

766 first_residue_number : Optional[int] = None, 

767 second_residue_number : Optional[int] = None, 

768 first_residue_icode : Optional[str] = None, 

769 second_residue_icode : Optional[str] = None, 

770 ) -> tuple['Residue', 'Residue']: 

771 """Split this residue in 2 residues and return them in a tuple. 

772 Keep things coherent in the structure (renumerate all residues below this one). 

773 Note that all residue atoms must be covered by the splits.""" 

774 # This function is expected to be called in a residue with an already set structure 

775 if not self.structure: 

776 raise InputError('The split function should be called when the residue has an already defined structure') 

777 # Make sure all atoms in the residue are covered between both the first and the second residue 

778 if set(self.atom_indices) != set(first_residue_atom_indices + second_residue_atom_indices): 

779 print('Residue atoms: ' + ', '.join([ str(v) for v in set(self.atom_indices) ])) 

780 print('Covered atoms: ' + ', '.join([ str(v) for v in set(first_residue_atom_indices + second_residue_atom_indices) ])) 

781 raise InputError('All atom indices must be covered between both the first and the second residue') 

782 # Reuse this first residue instead of creating a new one 

783 if first_residue_name: 

784 self.name = first_residue_name 

785 if first_residue_number: 

786 self.number = first_residue_number 

787 if first_residue_icode: 

788 self.icode = first_residue_icode 

789 # Set the new second residue 

790 _second_residue_name = second_residue_name if second_residue_name else self.name 

791 _second_residue_number = second_residue_number if second_residue_number else self.number 

792 _second_residue_icode = second_residue_icode if second_residue_icode else get_next_letter(self.icode) 

793 second_residue = Residue(_second_residue_name, _second_residue_number, _second_residue_icode) 

794 second_residue._structure = self.structure 

795 new_residue_index = self.index + 1 

796 second_residue._index = new_residue_index 

797 # Insert the second residue in the structure residues list right after this residue 

798 self.structure.residues.insert(new_residue_index, second_residue) 

799 # Set the second residue index 

800 # Update the index of all residues which have been shifted after the insertion 

801 for residue_index in range(new_residue_index + 1, len(self.structure.residues)): 

802 residue = self.structure.residues[residue_index] 

803 residue.index = residue_index 

804 # Now transfer atoms from residue 1 to residue 2 as it is specified 

805 # Note that this will automatically update every atom 

806 second_residue.atom_indices = second_residue_atom_indices 

807 # Now add the new residue to the chain 

808 self.chain.add_residue(second_residue) 

809 

810 def split_by_atom_names (self, 

811 first_residue_atom_names : list[str], 

812 second_residue_atom_names : list[str], 

813 first_residue_name : Optional[str] = None, 

814 second_residue_name : Optional[str] = None, 

815 first_residue_number : Optional[int] = None, 

816 second_residue_number : Optional[int] = None, 

817 first_residue_icode : Optional[str] = None, 

818 second_residue_icode : Optional[str] = None, 

819 ) -> tuple['Residue', 'Residue']: 

820 """Parse atom names to atom indices and then call the split function.""" 

821 # Check all atom names to exist in the residue 

822 input_atom_names = set(first_residue_atom_names + second_residue_atom_names) 

823 residue_atom_names = set([ atom.name for atom in self.atoms ]) 

824 if input_atom_names != residue_atom_names: 

825 print(self) 

826 print(self.atoms) 

827 print('Input atom names: ' + ', '.join(input_atom_names)) 

828 print('Residue atom names: ' + ', '.join(residue_atom_names)) 

829 raise InputError('All residue atoms must be covered between both the first and the second residue atom names') 

830 # Convert atom names to atom indices 

831 first_residue_atom_indices = [ self.get_atom_by_name(name).index for name in first_residue_atom_names ] 

832 second_residue_atom_indices = [ self.get_atom_by_name(name).index for name in second_residue_atom_names ] 

833 # Call the actual split logic 

834 return self.split(first_residue_atom_indices, second_residue_atom_indices, first_residue_name, 

835 second_residue_name, first_residue_number, second_residue_number, first_residue_icode, second_residue_icode) 

836 

837 def get_atom_by_name (self, atom_name : str) -> 'Atom': 

838 """Get a residue atom given its name.""" 

839 return next(( atom for atom in self.atoms if atom.name == atom_name ), None) 

840 

841 def get_label (self) -> str: 

842 """Get a standard label.""" 

843 chainname = self.chain.name if self.chain.name.strip() else '' 

844 return f'{chainname}{self.number}{self.icode}({self.name})' 

845 label = property(get_label, None, None, 

846 "The residue standard label (read only)") 

847 

848 def is_cg (self) -> bool: 

849 """Ask if the current residue is in coarse grain. 

850 Note that we assume there may be not hybrid aa/cg residues.""" 

851 return any(atom.element == CG_ATOM_ELEMENT for atom in self.atoms) 

852 

853class Chain: 

854 """A chain of residues.""" 

855 def __init__ (self, 

856 name : Optional[str] = None, 

857 classification : Optional[str] = None, 

858 ): 

859 self.name = name 

860 self._classification = classification 

861 # Set variables to store references to other related instaces 

862 # These variables will be set further by the structure 

863 self._structure = None 

864 self._index = None 

865 self._residue_indices = [] 

866 

867 def __repr__ (self): 

868 return '<Chain ' + self.name + '>' 

869 

870 def __eq__ (self, other): 

871 return self.name == other.name 

872 

873 def __hash__ (self): 

874 return hash(self.name) 

875 

876 # The parent structure (read only) 

877 # This value is set by the structure itself 

878 def get_structure (self) -> Optional['Structure']: 

879 return self._structure 

880 structure: 'Structure' = property(get_structure, None, None, "The parent structure (read only)") 

881 

882 # The residue index according to parent structure residues (read only) 

883 # This value is set by the structure itself 

884 def get_index (self) -> Optional[int]: 

885 return self._index 

886 # When the index is set all residues are updated with the next chain index 

887 def set_index (self, index : int): 

888 for residue in self.residues: 

889 residue._chain_index = index 

890 self._index = index 

891 index = property(get_index, set_index, None, "The residue index according to parent structure residues (read only)") 

892 

893 # The residue indices according to parent structure residues for residues in this chain 

894 # If residue indices are set then make changes in all the structure to make this change coherent 

895 def get_residue_indices (self) -> list[int]: 

896 return self._residue_indices 

897 def set_residue_indices (self, new_residue_indices : list[int]): 

898 # If there is not strucutre yet it means the chain is beeing set before the structure 

899 # We just save residue indices and wait for the structure to be set 

900 if not self.structure: 

901 self._residue_indices = new_residue_indices 

902 return 

903 # Update the current residues 

904 for residue in self.residues: 

905 residue._chain_index = None 

906 # Update the new residues 

907 for index in new_residue_indices: 

908 residue = self.structure.residues[index] 

909 residue._chain_index = self.index 

910 # In case the new residue indices list is empty this chain must be removed from its structure 

911 if len(new_residue_indices) == 0: 

912 self.structure.purge_chain(self) 

913 # Now new indices are coherent and thus we can save them 

914 self._residue_indices = new_residue_indices 

915 residue_indices = property(get_residue_indices, set_residue_indices, None, "The residue indices according to parent structure residues for residues in this residue") 

916 

917 def get_residues (self) -> list['Residue']: 

918 """The residues in this chain. 

919 If residues are set then make changes in all the structure to make this change coherent.""" 

920 # If there is not strucutre yet it means the chain is beeing set before the structure 

921 # In this case it is not possible to get related residues in the structure 

922 if not self.structure: 

923 return [] 

924 # Get residues in the structure according to residue indices 

925 residues = self.structure.residues 

926 return [ residues[residue_index] for residue_index in self.residue_indices ] 

927 

928 def set_residues (self, new_residues : list['Residue']): 

929 """Find indices for new residues and set their indices as the new residue indices. 

930 Note that residues must be set in the structure already.""" 

931 new_residue_indices = [] 

932 for new_residue in new_residues: 

933 new_residue_index = new_residue.index 

934 if new_residue_index == None: 

935 raise ValueError(f'Residue {new_residue} is not set in the structure') 

936 new_residue_indices.append(new_residue_index) 

937 self.set_residue_indices(new_residue_indices) 

938 residues: list['Residue'] = property(get_residues, set_residues, None, "The residues in this chain") 

939 

940 def add_residue (self, residue : 'Residue'): 

941 """Add a residue to the chain.""" 

942 # Insert the new residue index in the list of residue indices keeping the order 

943 sorted_residue_index = bisect(self.residue_indices, residue.index) 

944 self.residue_indices.insert(sorted_residue_index, residue.index) 

945 # Update the residue internal chain index 

946 residue._chain_index = self.index 

947 

948 def remove_residue (self, residue : 'Residue'): 

949 """Remove a residue from the chain. 

950 WARNING: Note that this function does not trigger the set_residue_indices.""" 

951 self.residue_indices.remove(residue.index) # This index MUST be in the list 

952 # If we removed the last residue then this chain must be removed from its structure 

953 if len(self.residue_indices) == 0 and self.structure: 

954 self.structure.purge_chain(self) 

955 # Update the residue internal chain index 

956 residue._chain_index = None 

957 

958 def get_atom_indices (self) -> list[int]: 

959 """Get atom indices for all atoms in the chain (read only). 

960 In order to change atom indices they must be changed in their corresponding residues.""" 

961 return sum([ residue.atom_indices for residue in self.residues ], []) 

962 atom_indices = property(get_atom_indices, None, None, "Atom indices for all atoms in the chain (read only)") 

963 

964 def get_atoms (self) -> list[int]: 

965 """Get the atoms in the chain (read only). 

966 In order to change atoms they must be changed in their corresponding residues.""" 

967 return sum([ residue.atoms for residue in self.residues ], []) 

968 atoms: list['Atom'] = property(get_atoms, None, None, "Atoms in the chain (read only)") 

969 

970 def get_atom_count (self) -> int: 

971 """Get the number of atoms in the chain (read only).""" 

972 return len(self.atom_indices) 

973 atom_count = property(get_atom_count, None, None, "Number of atoms in the chain (read only)") 

974 

975 def get_residue_count (self) -> int: 

976 """Get the number of residues in the chain (read only).""" 

977 return len(self._residue_indices) 

978 residue_count = property(get_residue_count, None, None, "Number of residues in the chain (read only)") 

979 

980 def get_classification (self) -> str: 

981 """Get the chain classification.""" 

982 if self._classification: 

983 return self._classification 

984 self._classification = self.structure.get_selection_classification(self.get_selection()) 

985 return self._classification 

986 

987 def set_classification (self, classification : str): 

988 """Force the chain classification.""" 

989 self._classification = classification 

990 

991 classification = property(get_classification, set_classification, None, 

992 "Classification of the chain (manual or automatic)") 

993 

994 def get_sequence (self) -> str: 

995 """Get the residues sequence in one-letter code.""" 

996 return ''.join([ residue_name_to_letter(residue.name) for residue in self.residues ]) 

997 

998 def get_selection (self) -> 'Selection': 

999 """Generate a selection for this chain.""" 

1000 return Selection(self.atom_indices) 

1001 

1002 def copy (self) -> 'Chain': 

1003 """Make a copy of the current chain.""" 

1004 chain_copy = Chain(self.name) 

1005 chain_copy._structure = self._structure 

1006 chain_copy._index = self._index 

1007 chain_copy.residue_indices = self.residue_indices 

1008 return chain_copy 

1009 

1010 def has_cg (self) -> bool: 

1011 """Ask if the current chain has at least one coarse grain atom/residue.""" 

1012 return any(atom.element == CG_ATOM_ELEMENT for atom in self.atoms) 

1013 

1014 def is_missing_any_bonds (self) -> bool: 

1015 return any(atom.bonds == MISSING_BONDS for atom in self.atoms) 

1016 

1017 def find_residue (self, number : int, icode : str = '', index = None) -> Optional['Residue']: 

1018 """Find a residue by its number and insertion code.""" 

1019 # Iterate chain residues 

1020 for residue in self.residues: 

1021 if residue.number == number and residue.icode == icode and (index is None or residue.index == index): 

1022 return residue 

1023 return None 

1024 

1025class Structure: 

1026 """A structure is a group of atoms organized in chains and residues.""" 

1027 def __init__ (self, 

1028 atoms : list['Atom'] = [], 

1029 residues : list['Residue'] = [], 

1030 chains : list['Chain'] = [], 

1031 residue_numeration_base : int = 10, 

1032 ): 

1033 self.atoms: list['Atom'] = [] 

1034 self.residues: list['Residue'] = [] 

1035 self.chains: list['Chain'] = [] 

1036 # Set references between instances 

1037 for atom in atoms: 

1038 self.set_new_atom(atom) 

1039 for residue in residues: 

1040 self.set_new_residue(residue) 

1041 for chain in chains: 

1042 self.set_new_chain(chain) 

1043 # --- Set other internal variables --- 

1044 # Set bonds between atoms 

1045 self._bonds = None 

1046 # Set fragments of bonded atoms 

1047 self._fragments = None 

1048 # --- Set other auxiliar variables --- 

1049 # Trajectory atom sorter is a function used to sort coordinates in a trajectory file 

1050 # This function is generated when sorting indices in the structure 

1051 # Also the new atom order is saved 

1052 self.trajectory_atom_sorter = None 

1053 self.new_atom_order = None 

1054 # Track when atom elements have been fixed 

1055 self._fixed_atom_elements = False 

1056 # Save indices of supported ion atoms 

1057 self._ion_atom_indices = None 

1058 self._dummy_atom_indices = None 

1059 # Set the scale of the residue numbers 

1060 # It may be decimal (10), hexadecimal(16) or alphanumeric(36) 

1061 # Note that hybrid scales (1-9999 decimal and different further) are not explicitly supported 

1062 # However, the scale is guessed on read and conserved on write, so the original numeration would be conserved 

1063 # The default values is used only when the structure is not read from a PDB 

1064 self.residue_numeration_base = residue_numeration_base 

1065 

1066 def __repr__ (self): 

1067 return f'<Structure ({self.atom_count} atoms)>' 

1068 

1069 def get_bonds (self) -> list[ list[int] ]: 

1070 """Get the bonds between atoms.""" 

1071 # Return the stored value, if exists 

1072 if self._bonds: 

1073 return self._bonds 

1074 # If not, we must calculate the bonds using vmd 

1075 self._bonds = self.get_covalent_bonds() 

1076 return self._bonds 

1077 def set_bonds (self, bonds : list[ list[int] ]): 

1078 """Force specific bonds.""" 

1079 self._bonds = bonds 

1080 # Reset fragments 

1081 self._fragments = None 

1082 bonds = property(get_bonds, set_bonds, None, "The structure bonds") 

1083 

1084 def get_fragments (self) -> list['Selection']: 

1085 """Get the groups of atoms which are covalently bonded.""" 

1086 # Return the stored value, if exists 

1087 if self._fragments != None: 

1088 return self._fragments 

1089 # Otherwise, find fragments in all structure atoms 

1090 self._fragments = list(self.find_fragments()) 

1091 return self._fragments 

1092 # Fragments of covalently bonded atoms (read only) 

1093 fragments: list['Selection'] = property(get_fragments, None, None, "The structure fragments (read only)") 

1094 

1095 def find_fragments (self, 

1096 selection : Optional['Selection'] = None, 

1097 exclude_disulfide : bool = True, 

1098 atom_bonds : Optional[list[list[int]]] = None, 

1099 ) -> Generator['Selection', None, None]: 

1100 """Find fragments in a selection of atoms. A fragment is a selection of 

1101 covalently bonded atoms. All atoms are searched if no selection is provided. 

1102 

1103 WARNING: Note that fragments generated from a specific selection may not  

1104 match the structure fragments. A selection including 2 separated regions of a  

1105 structure fragment will yield 2 fragments. 

1106 

1107 For convenience, protein disulfide bonds are not considered in this logic by default. 

1108 """ 

1109 # If there is no selection we consider all atoms 

1110 if not selection: selection = self.select_all() 

1111 # Get/Find covalent bonds between atoms in a new object avoid further corruption (deletion) of the original list 

1112 safe_bonds = atom_bonds if atom_bonds else self.bonds 

1113 atom_indexed_covalent_bonds = { atom_index: [ *safe_bonds[atom_index] ] for atom_index in selection.atom_indices } 

1114 # Exclude disulfide bonds from  

1115 if exclude_disulfide: 

1116 # First search for protein disulfide bonds 

1117 excluded = set() 

1118 # Iterate atom bonds 

1119 for atom_index, bonds in atom_indexed_covalent_bonds.items(): 

1120 # Get the actual atom 

1121 atom = self.atoms[atom_index] 

1122 # If it is not sulfur then continue 

1123 if atom.element != 'S': continue 

1124 # Iterate bonded atoms 

1125 for bonded_atom_index in bonds: 

1126 # Bonds with atoms out of the selection are not considered 

1127 if bonded_atom_index not in atom_indexed_covalent_bonds: continue 

1128 # Get the bonded atom 

1129 bonded_atom = self.atoms[bonded_atom_index] 

1130 if bonded_atom.element != 'S': continue 

1131 # We found a disulfide bond here 

1132 # Make sure this is protein 

1133 if atom.residue.classification != 'protein': continue 

1134 if bonded_atom.residue.classification != 'protein': continue 

1135 # We found a protein disulfide bond, we must exclude it 

1136 bond = tuple(sorted([atom_index, bonded_atom_index])) 

1137 excluded.add(bond) 

1138 # Now remove the corresponding bonds 

1139 for bond in excluded: 

1140 first_atom_index, second_atom_index = bond 

1141 atom_indexed_covalent_bonds[first_atom_index].remove(second_atom_index) 

1142 atom_indexed_covalent_bonds[second_atom_index].remove(first_atom_index) 

1143 # Group the connected atoms in "fragments" 

1144 while len(atom_indexed_covalent_bonds) > 0: 

1145 start_atom_index, bonds = next(iter(atom_indexed_covalent_bonds.items())) 

1146 del atom_indexed_covalent_bonds[start_atom_index] 

1147 fragment_atom_indices = [ start_atom_index ] 

1148 while len(bonds) > 0: 

1149 # Get the next bond atom and remove it from the bonds list 

1150 next_atom_index = bonds[0] 

1151 bonds.remove(next_atom_index) 

1152 next_bonds = atom_indexed_covalent_bonds.get(next_atom_index, None) 

1153 # If this atom is out of the selection then skip it 

1154 if next_bonds == None: 

1155 continue 

1156 next_new_bonds = [ bond for bond in next_bonds if bond not in fragment_atom_indices + bonds ] 

1157 bonds += next_new_bonds 

1158 fragment_atom_indices.append(next_atom_index) 

1159 del atom_indexed_covalent_bonds[next_atom_index] 

1160 yield Selection(fragment_atom_indices) 

1161 

1162 def find_whole_fragments (self, selection : 'Selection') -> Generator['Selection', None, None]: 

1163 """Given a selection of atoms, find all whole structure fragments on them.""" 

1164 for fragment in self.fragments: 

1165 if selection & fragment: 

1166 yield fragment 

1167 

1168 def name_selection (self, selection : 'Selection') -> str: 

1169 """Name an atom selection depending on the chains it contains. This is used for debug purpouses.""" 

1170 atoms = [ self.atoms[index] for index in selection.atom_indices ] 

1171 # Count atoms per chain 

1172 atom_count_per_chain = { chain: 0 for chain in self.chains } 

1173 for atom in atoms: 

1174 atom_count_per_chain[atom.chain] += 1 

1175 # Set labels accoridng to the coverage of every chain 

1176 chain_labels = [] 

1177 for chain, atom_count in atom_count_per_chain.items(): 

1178 if atom_count == 0: continue 

1179 coverage = atom_count / chain.atom_count 

1180 label = f'chain {chain.name}' 

1181 if coverage < 1: 

1182 percent = round(coverage * 1000) / 10 

1183 label += f' ({percent}%)' 

1184 chain_labels.append(label) 

1185 return ', '.join(chain_labels) 

1186 

1187 def set_new_atom (self, atom : 'Atom'): 

1188 """Set a new atom in the structure.""" 

1189 atom._structure = self 

1190 new_atom_index = self.atom_count 

1191 self.atoms.append(atom) 

1192 atom._index = new_atom_index 

1193 

1194 def set_new_residue (self, residue : 'Residue'): 

1195 """ 

1196 Set a new residue in the structure. 

1197 WARNING: Atoms must be set already before setting residues. 

1198 """ 

1199 residue._structure = self 

1200 new_residue_index = len(self.residues) 

1201 self.residues.append(residue) 

1202 residue._index = new_residue_index 

1203 # In case the residue has atom indices, set relational indices on each atom 

1204 for atom_index in residue.atom_indices: 

1205 atom = self.atoms[atom_index] 

1206 atom._residue_index = new_residue_index 

1207 

1208 def purge_residue (self, residue : 'Residue'): 

1209 """ 

1210 Purge residue from the structure and its chain. 

1211 This can be done only when the residue has no atoms left in the structure. 

1212 Renumerate all residue indices which have been offsetted as a result of the purge. 

1213 """ 

1214 # Check the residue can be purged 

1215 if residue not in self.residues: 

1216 raise ValueError(f'{residue} is not in the structure already') 

1217 if len(residue.atom_indices) > 0: 

1218 raise ValueError(f'{residue} is still having atoms and thus it cannot be purged') 

1219 # Remove the purged residue from its chain 

1220 if residue.chain: residue.chain.remove_residue(residue) 

1221 # Get the current index of the residue to be purged 

1222 purged_index = residue.index 

1223 # Residues and their atoms below this index are not to be modified 

1224 # Residues and their atoms over this index must be renumerated 

1225 for affected_residue in self.residues[purged_index+1:]: 

1226 # Chaging the index automatically changes all residues atoms '_residue_index' values 

1227 # Chaging the index automatically changes its corresponding index in residue chain '_residue_indices' 

1228 affected_residue.index -= 1 

1229 # Finally, remove the current residue from the list of residues in the structure 

1230 del self.residues[purged_index] 

1231 

1232 def set_new_chain (self, chain : 'Chain'): 

1233 """Set a new chain in the structure. 

1234 WARNING: Residues and atoms must be set already before setting chains.""" 

1235 chain._structure = self 

1236 new_chain_index = len(self.chains) 

1237 self.chains.append(chain) 

1238 chain._index = new_chain_index 

1239 # In case the chain has residue indices, set relational indices on each residue 

1240 for residue_index in chain.residue_indices: 

1241 residue = self.residues[residue_index] 

1242 residue._chain_index = new_chain_index 

1243 

1244 def purge_chain (self, chain : 'Chain'): 

1245 """Purge chain from the structure. 

1246 This can be done only when the chain has no residues left in the structure. 

1247 Renumerate all chain indices which have been offsetted as a result of the purge.""" 

1248 # Check the chain can be purged 

1249 if chain not in self.chains: 

1250 raise ValueError(f'Chain {chain.name} is not in the structure already') 

1251 if len(chain.residue_indices) > 0: 

1252 raise ValueError(f'Chain {chain.name} is still having residues and thus it cannot be purged') 

1253 # Get the current index of the chain to be purged 

1254 purged_index = chain.index 

1255 # Chains and their residues below this index are not to be modified 

1256 # Chains and their residues over this index must be renumerated 

1257 for affected_chain in self.chains[purged_index+1:]: 

1258 # Chaging the index automatically changes all chain residues '_chain_index' values 

1259 affected_chain.index -= 1 

1260 # Finally, remove the current chain from the list of chains in the structure 

1261 del self.chains[purged_index] 

1262 

1263 @classmethod 

1264 def from_pdb (cls, pdb_content : str, model : Optional[int] = None, flexible_numeration : bool = True): 

1265 """Set the structure from a pdb file. 

1266 You may filter the PDB content for a specific model. 

1267 Some weird numeration systems are not supported and, when encountered, they are ignored. 

1268 In these cases we set our own numeration system. 

1269 Set the flexible numeration argument as false to avoid this behaviour, thus crashing instead.""" 

1270 # Filter the PDB content in case a model was passed 

1271 filtered_pdb_content = filter_model(pdb_content, model) if model else pdb_content 

1272 # Split the PDB content in lines 

1273 pdb_lines = filtered_pdb_content.split('\n') 

1274 # Before we start, we must guess the numeration system 

1275 # To do so mine all residue numbers 

1276 all_residue_number_characters = set() 

1277 for line in pdb_lines: 

1278 # Parse atoms only 

1279 start = line[0:6] 

1280 is_atom = start == 'ATOM ' or start == 'HETATM' 

1281 if not is_atom: continue 

1282 # Mine all residue numbers 

1283 residue_number = line[22:26] 

1284 for character in residue_number: 

1285 all_residue_number_characters.add(character) 

1286 # Remove white spaces 

1287 all_residue_number_characters.discard(' ') 

1288 # If we find a non-numerical and non-alphabetical character then we assume it has a weird numeration system 

1289 # Since we can not support every scenario, in this cases we set the numeration totally ignoring the one in the PDB 

1290 weird_character = next((character for character in all_residue_number_characters if not (character.isalnum() or character == '-')), None) 

1291 if weird_character: 

1292 if flexible_numeration == False: raise InputError(f'Not supported numeration system including "{weird_character}" characters') 

1293 warn(f'Weird residue numeration including "{weird_character}" characters found in the PDB file. Ignoring it.') 

1294 residue_numeration_base = None 

1295 # Search among all resiude numbers any letters (non-numerical characters) 

1296 elif next((letter for letter in alphanumerical_letters if letter in all_residue_number_characters), None): 

1297 residue_numeration_base = 36 

1298 elif next((letter for letter in hexadecimal_letters if letter in all_residue_number_characters), None): 

1299 residue_numeration_base = 16 

1300 else: 

1301 residue_numeration_base = 10 

1302 # Read the pdb content line by line and set the parsed atoms, residues and chains 

1303 parsed_atoms = [] 

1304 parsed_residues = [] 

1305 parsed_chains = [] 

1306 # Save chains and residues also in dictionaries only to find them faster 

1307 name_chains = {} 

1308 label_residues = {} 

1309 # Keep track of the last issued atom and residue indices 

1310 atom_index = -1 

1311 residue_index = -1 

1312 for line in pdb_lines: 

1313 # Parse atoms only 

1314 start = line[0:6] 

1315 is_atom = start == 'ATOM ' or start == 'HETATM' 

1316 if not is_atom: 

1317 continue 

1318 # Mine all atom data 

1319 atom_name = line[11:16].strip() 

1320 residue_name = line[17:21].strip() 

1321 chain_name = line[21:22] 

1322 residue_number = line[22:26] 

1323 icode = line[26:27] 

1324 if icode == ' ': 

1325 icode = '' 

1326 x_coord = float(line[30:38]) 

1327 y_coord = float(line[38:46]) 

1328 z_coord = float(line[46:54]) 

1329 element = line[76:78].strip() 

1330 # Set the parsed atom 

1331 parsed_atom = Atom(name=atom_name, element=element, coords=(x_coord, y_coord, z_coord)) 

1332 # Add the parsed atom to the list and update the current atom index 

1333 parsed_atoms.append(parsed_atom) 

1334 # Get the parsed chain 

1335 # DANI: If we always check for an already existing chain there will never be repeated chains 

1336 # DANI: However we may create chains with unconnected atoms silently (they were separated in the PDB) 

1337 # DANI: For this reason we must only consider the last processed chain 

1338 parsed_chain = name_chains.get(chain_name, None) 

1339 if parsed_chain and parsed_chain != parsed_chains[-1]: 

1340 parsed_chain = None 

1341 # If the parsed chain was not yet instantiated then do it now 

1342 if not parsed_chain: 

1343 parsed_chain = Chain(name=chain_name) 

1344 parsed_chains.append(parsed_chain) 

1345 name_chains[chain_name] = parsed_chain 

1346 # Get the parsed residue 

1347 residue_label = (chain_name, residue_number, icode) 

1348 # DANI: If we always check for an already existing residue there will never be repeated residues 

1349 # DANI: However we may create residues with unconnected atoms silently (they were separated in the PDB) 

1350 # DANI: For this reason we must only consider the last processed residue 

1351 parsed_residue = label_residues.get(residue_label, None) 

1352 if parsed_residue and parsed_residue != parsed_residues[-1]: 

1353 parsed_residue = None 

1354 # If the parsed residue was not yet instantiated then do it now 

1355 if not parsed_residue: 

1356 # Parse the residue number if it is to be parsed 

1357 if residue_numeration_base: 

1358 parsed_residue_number = int(residue_number, residue_numeration_base) 

1359 # If we decided to ignore the numeration system then we just issue a new residue number 

1360 # Use the last residue number from the current chain as reference 

1361 else: 

1362 chain_last_residue_index = parsed_chain.residue_indices[-1] if len(parsed_chain.residue_indices) > 0 else None 

1363 chain_last_residue_number = parsed_residues[chain_last_residue_index].number if chain_last_residue_index != None else 0 

1364 parsed_residue_number = chain_last_residue_number + 1 

1365 # Instantiate the new parsed residue 

1366 parsed_residue = Residue(name=residue_name, number=parsed_residue_number, icode=icode) 

1367 parsed_residues.append(parsed_residue) 

1368 label_residues[residue_label] = parsed_residue 

1369 # Add current residue to the parsed chain 

1370 residue_index += 1 

1371 parsed_chain.residue_indices.append(residue_index) 

1372 # Add current atom to the parsed residue 

1373 atom_index += 1 

1374 parsed_residue.atom_indices.append(atom_index) 

1375 return cls(atoms=parsed_atoms, residues=parsed_residues, chains=parsed_chains, residue_numeration_base=residue_numeration_base) 

1376 

1377 @classmethod 

1378 def from_pdb_file (cls, pdb_filepath : str, model : Optional[int] = None, flexible_numeration : bool = True): 

1379 """Set the structure from a pdb file. 

1380 You may filter the input PDB file for a specific model. 

1381 Some weird numeration systems are not supported and, when encountered, they are ignored. 

1382 In these cases we set our own numeration system. 

1383 Set the flexible numeration argument as false to avoid this behaviour, thus crashing instead.""" 

1384 pdb_file = File(pdb_filepath) 

1385 if not pdb_file.exists: 

1386 raise InputError(f'File "{pdb_filepath}" not found') 

1387 if not pdb_file.format == 'pdb': 

1388 raise InputError(f'"{pdb_filepath}" is not a path for a pdb file') 

1389 # Read the pdb file 

1390 pdb_content = None 

1391 with open(pdb_filepath, 'r') as file: 

1392 pdb_content = file.read() 

1393 return cls.from_pdb(pdb_content, model, flexible_numeration) 

1394 

1395 # https://biopandas.github.io/biopandas/tutorials/Working_with_mmCIF_Structures_in_DataFrames/ 

1396 @classmethod 

1397 def from_mmcif (cls, mmcif_content : str, model : Optional[int] = None, author_notation : bool = False): 

1398 """Set the structure from mmcif. 

1399 You may filter the content for a specific model. 

1400 You may ask for the author notation instead of the standarized notation for legacy reasons. 

1401 This may have an effect in atom names, residue names, residue numbers and chain names. 

1402 Read the pdb content line by line and set the parsed atoms, residues and chains. 

1403 """ 

1404 parsed_atoms = [] 

1405 parsed_residues = [] 

1406 parsed_chains = [] 

1407 # Save chains and residues also in dictionaries only to find them faster 

1408 name_chains = {} 

1409 label_residues = {} 

1410 # Keep track of the last issued atom and residue indices 

1411 atom_index = -1 

1412 residue_index = -1 

1413 # Iterate the content line by line 

1414 lines = iter(mmcif_content.split('\n')) 

1415 # Now mine atoms data 

1416 atom_headers = { 'ATOM', 'HETATM', 'ANISOU' } 

1417 for line in lines: 

1418 # Values are separated by spaces 

1419 values = line.split() 

1420 # If this is not atom line then we are done 

1421 if len(values) == 0 or values[0] not in atom_headers: continue 

1422 # Get the atom model number if a model number was passed 

1423 # Note that then model is a number greater than 0 

1424 if model: 

1425 model_number = int(values[20]) 

1426 # If the model number odes not match then skip it 

1427 if model != model_number: continue 

1428 # Mine atom data 

1429 # Next value is just the atom number, we do not need it 

1430 # atom_number = values[1] 

1431 # Mine the atom element 

1432 element = values[2] 

1433 # Mine the atom name 

1434 atom_name = values[3].replace('"','') 

1435 # Next value is a place holder to indicate alternate conformation, according to the docs 

1436 # I don't know what this is but we do not need it 

1437 # idk = values[4] 

1438 # Mine the residue name 

1439 residue_name = values[5] 

1440 # Mine the chain name 

1441 chain_name = values[6] 

1442 # Next value is the chain number, we do not need it 

1443 # chain_number = values[7] 

1444 # Residue number is '.' for chains with only one residue 

1445 residue_number = 1 if values[8] == '.' else int(values[8]) 

1446 icode = '' if values[9] == '?' else values[9] 

1447 x_coord = float(values[10]) 

1448 y_coord = float(values[11]) 

1449 z_coord = float(values[12]) 

1450 # Next value is the occupancy, we do not need it 

1451 # occupancy = float(values[13]) 

1452 # Next value is the isotropic displacement, we do not need it 

1453 # isotropic = float(values[14]) 

1454 # Next value is the charge, we do not need it 

1455 # charge = None if values[15] == '?' else float(values[15]) 

1456 # The rest of values are alternative author values 

1457 if author_notation: 

1458 residue_number = int(values[16]) 

1459 residue_name = values[17] 

1460 chain_name = values[18] 

1461 atom_name = values[19].replace('"','') 

1462 # Set the parsed atom 

1463 parsed_atom = Atom(name=atom_name, element=element, coords=(x_coord, y_coord, z_coord)) 

1464 # Add the parsed atom to the list and update the current atom index 

1465 parsed_atoms.append(parsed_atom) 

1466 # Get the parsed chain 

1467 parsed_chain = name_chains.get(chain_name, None) 

1468 # If the parsed chain was not yet instantiated then do it now 

1469 if not parsed_chain: 

1470 parsed_chain = Chain(name=chain_name) 

1471 parsed_chains.append(parsed_chain) 

1472 name_chains[chain_name] = parsed_chain 

1473 # Get the parsed residue 

1474 residue_label = (chain_name, residue_number, icode) 

1475 parsed_residue = label_residues.get(residue_label, None) 

1476 # If the parsed residue was not yet instantiated then do it now 

1477 if not parsed_residue: 

1478 # Instantiate the new parsed residue 

1479 parsed_residue = Residue(name=residue_name, number=residue_number, icode=icode) 

1480 parsed_residues.append(parsed_residue) 

1481 label_residues[residue_label] = parsed_residue 

1482 # Add current residue to the parsed chain 

1483 residue_index += 1 

1484 parsed_chain.residue_indices.append(residue_index) 

1485 # Add current atom to the parsed residue 

1486 atom_index += 1 

1487 parsed_residue.atom_indices.append(atom_index) 

1488 return cls(atoms=parsed_atoms, residues=parsed_residues, chains=parsed_chains) 

1489 

1490 @classmethod 

1491 def from_mmcif_file (cls, mmcif_filepath : str, model : Optional[int] = None, author_notation : bool = False): 

1492 """Set the structure from a mmcif file.""" 

1493 mmcif_file = File(mmcif_filepath) 

1494 if not mmcif_file.exists: 

1495 raise InputError(f'File "{mmcif_filepath}" not found') 

1496 if not mmcif_file.format == 'cif': 

1497 raise InputError(f'"{mmcif_filepath}" is not a path for a mmcif file') 

1498 # Read the mmcif file 

1499 mmcif_content = None 

1500 with open(mmcif_filepath, 'r') as file: 

1501 mmcif_content = file.read() 

1502 return cls.from_mmcif(mmcif_content, model, author_notation) 

1503 

1504 @classmethod 

1505 def from_mdanalysis (cls, mdanalysis_universe): 

1506 """Set the structure from an MD analysis object.""" 

1507 # Set the final list of atoms to be included in the structure 

1508 parsed_atoms = [] 

1509 parsed_residues = [] 

1510 parsed_chains = [] 

1511 # Setup atoms 

1512 for atom in mdanalysis_universe.atoms: 

1513 name = atom.name 

1514 # WARNING: MDAnalysis cannot handle reading an element of an atom with no element 

1515 element = atom.element if hasattr(atom, 'element') else '?' 

1516 #coords = atom.position # DANI: Esto da error si no hay coordenadas 

1517 parsed_atom = Atom(name=name, element=element) 

1518 parsed_atoms.append(parsed_atom) 

1519 # Setup residues 

1520 for residue in mdanalysis_universe.residues: 

1521 name = residue.resname 

1522 number = residue.resnum 

1523 icode = residue.icode if hasattr(residue, 'icode') else None # DANI: No se ha provado 

1524 parsed_residue = Residue(name=name, number=number, icode=icode) 

1525 atom_indices = [ atom.index for atom in residue.atoms ] 

1526 parsed_residue.atom_indices = atom_indices 

1527 parsed_residues.append(parsed_residue) 

1528 # Setup chains 

1529 for segment in mdanalysis_universe.segments: 

1530 name = segment.segid 

1531 parsed_chain = Chain(name=name) 

1532 residue_indices = [ residue.resindex for residue in segment.residues ] 

1533 parsed_chain.residue_indices = residue_indices 

1534 parsed_chains.append(parsed_chain) 

1535 # Setup the structure 

1536 structure = cls(atoms=parsed_atoms, residues=parsed_residues, chains=parsed_chains) 

1537 # Add bonds and charges which are also available in a topology 

1538 atom_count = len(mdanalysis_universe.atoms) 

1539 atom_bonds = [ [] for i in range(atom_count) ] 

1540 for bond in mdanalysis_universe.bonds: 

1541 a,b = bond.indices 

1542 # Make sure atom indices are regular integers so they are JSON serializables 

1543 atom_bonds[a].append(int(b)) 

1544 atom_bonds[b].append(int(a)) 

1545 structure.bonds = atom_bonds 

1546 structure.charges = list(mdanalysis_universe._topology.charges.values) 

1547 return structure 

1548 

1549 @classmethod 

1550 def from_prmtop_file (cls, prmtop_filepath : str): 

1551 """Set the structure from a prmtop file.""" 

1552 # Make sure the input file exists and has the right format 

1553 prmtop_file = File(prmtop_filepath) 

1554 if not prmtop_file.exists: 

1555 raise InputError(f'File "{prmtop_filepath}" not found') 

1556 if not prmtop_file.format == 'prmtop': 

1557 raise InputError(f'"{prmtop_filepath}" is not a name for a prmtop file') 

1558 # In we do not have mdanalysis in our environment then we cannot proceed 

1559 if not is_imported('MDAnalysis'): 

1560 raise InputError('Missing dependency error: MDAnalysis') 

1561 # Parse the topology using MDanalysis 

1562 parser = MDAnalysis.topology.TOPParser.TOPParser(prmtop_filepath) 

1563 mdanalysis_topology = parser.parse() 

1564 mdanalysis_universe = MDAnalysis.Universe(mdanalysis_topology) 

1565 return cls.from_mdanalysis(mdanalysis_universe) 

1566 

1567 @classmethod 

1568 def from_tpr_file (cls, tpr_filepath : str): 

1569 """Set the structure from a tpr file.""" 

1570 # Make sure the input file exists and has the right format 

1571 tpr_file = File(tpr_filepath) 

1572 if not tpr_file.exists: 

1573 raise InputError(f'File "{tpr_filepath}" not found') 

1574 if not tpr_file.format == 'tpr': 

1575 raise InputError(f'"{tpr_filepath}" is not a name for a tpr file') 

1576 # In we do not have mdanalysis in our environment then we cannot proceed 

1577 if not is_imported('MDAnalysis'): 

1578 raise InputError('Missing dependency error: MDAnalysis') 

1579 # Parse the topology using MDanalysis 

1580 parser = MDAnalysis.topology.TPRParser.TPRParser(tpr_filepath) 

1581 mdanalysis_topology = parser.parse() 

1582 mdanalysis_universe = MDAnalysis.Universe(mdanalysis_topology) 

1583 return cls.from_mdanalysis(mdanalysis_universe) 

1584 

1585 @classmethod 

1586 def from_file (cls, mysterious_filepath : str): 

1587 """Set the structure from a file if the file format is supported.""" 

1588 mysterious_file = File(mysterious_filepath) 

1589 if mysterious_file.format == 'pdb': 

1590 return cls.from_pdb_file(mysterious_file.path) 

1591 if mysterious_file.format == 'cif': 

1592 return cls.from_mmcif_file(mysterious_file.path) 

1593 if mysterious_file.format == 'prmtop': 

1594 return cls.from_prmtop_file(mysterious_file.path) 

1595 if mysterious_file.format == 'tpr': 

1596 return cls.from_tpr_file(mysterious_file.path) 

1597 raise InputError(f'Not supported format ({mysterious_file.format}) to setup a Structure') 

1598 

1599 def get_atom_count (self) -> int: 

1600 """Get the number of atoms in the structure.""" 

1601 return len(self.atoms) 

1602 atom_count = property(get_atom_count, None, None, "The number of atoms in the structure (read only)") 

1603 

1604 def get_residue_count (self) -> int: 

1605 """Get the number of residues in the structure (read only).""" 

1606 return len(self.residues) 

1607 residue_count = property(get_residue_count, None, None, "Number of residues in the structure (read only)") 

1608 

1609 def get_chain_count (self) -> int: 

1610 """Get the number of chains in the structure.""" 

1611 return len(self.chains) 

1612 chain_count = property(get_chain_count, None, None, "Number of chains in the structure (read only)") 

1613 

1614 def fix_atom_elements (self, trust : bool = True) -> bool: 

1615 """Fix atom elements by guessing them when missing. 

1616 Set all elements with the first letter upper and the second (if any) lower. 

1617 Also check if atom elements are coherent with atom names. 

1618 

1619 Args: 

1620 trust (bool): If 'trust' is set as False then we impose elements according to what we can guess from the atom name. 

1621 

1622 Returns: 

1623 bool: 

1624 Return True if any element was modified or False if not. 

1625 """ 

1626 modified = False 

1627 added = False 

1628 # Save the wrong guesses for a final report 

1629 # This way we do not crowd the terminal with logs when a lot of atoms are affected 

1630 reports = {} 

1631 for atom in self.atoms: 

1632 # Make sure elements have the first letter cap and the second letter not cap 

1633 if atom.element: 

1634 new_element = first_cap_only(atom.element) 

1635 if atom.element != new_element: 

1636 atom.element = new_element 

1637 modified = True 

1638 # Check the element to match what we would guess from the atom name 

1639 # In case it does not just warn the user 

1640 guess = atom.guess_element() 

1641 if atom.element != guess: 

1642 report = (atom.name, atom.element, guess) 

1643 report_count = reports.get(report, 0) 

1644 reports[report] = report_count + 1 

1645 if not trust: 

1646 atom.element = guess 

1647 modified = True 

1648 # If elements are missing then guess them from atom names 

1649 else: 

1650 atom.element = atom.guess_element() 

1651 added = True 

1652 # Warn the user about anormalies 

1653 for report, count in reports.items(): 

1654 atom_name, atom_element, guess = report 

1655 warn(f"Suspicious element for atom {atom_name}: {atom_element} -> shoudn't it be {guess}? ({count} occurrences)") 

1656 # Warn the user that some elements were modified 

1657 if modified: warn('Atom elements have been modified') 

1658 if added: warn('Atom elements were missing and have been added') 

1659 # Set atom elements as fixed in order to avoid repeating this process 

1660 self._fixed_atom_elements = True 

1661 return modified or added 

1662 

1663 def set_new_coordinates (self, new_coordinates : list[Coords]): 

1664 """Set new coordinates.""" 

1665 # Make sure the list of coordinates is as long as the number of atoms 

1666 if len(new_coordinates) != self.atom_count: 

1667 raise ValueError(f'The number of coordinates ({len(new_coordinates)}) does not match the number of atoms ({self.atom_count})') 

1668 # Overwrite current coordinates with the new coordinates 

1669 for i, atom in enumerate(self.atoms): 

1670 atom.coords = tuple(new_coordinates[i]) 

1671 

1672 def get_ion_atom_indices (self) -> set: 

1673 """Get all supported ion atom indices together in a set.""" 

1674 # If we already did this then return the stored value 

1675 if self._ion_atom_indices != None: 

1676 return self._ion_atom_indices 

1677 # Find ion atom indices 

1678 indices = set() 

1679 for atom in self.atoms: 

1680 if atom.element in SUPPORTED_ION_ELEMENTS: 

1681 indices.add(atom.index) 

1682 self._ion_atom_indices = indices 

1683 return self._ion_atom_indices 

1684 ion_atom_indices = property(get_ion_atom_indices, None, None, "Atom indices for what we consider supported ions") 

1685 

1686 def get_dummy_atom_indices (self) -> set: 

1687 """Get all dummy atom indices together in a set.""" 

1688 # If we already did this then return the stored value 

1689 if self._dummy_atom_indices != None: 

1690 return self._dummy_atom_indices 

1691 # Find ion atom indices 

1692 indices = set() 

1693 for atom in self.atoms: 

1694 if atom.element == DUMMY_ATOM_ELEMENT: 

1695 indices.add(atom.index) 

1696 self._dummy_atom_indices = indices 

1697 return self._dummy_atom_indices 

1698 dummy_atom_indices = property(get_dummy_atom_indices, None, None, "Atom indices for what we consider dummy atoms") 

1699 

1700 def generate_pdb (self): 

1701 """Generate a pdb file content with the current structure.""" 

1702 content = 'REMARK workflow generated pdb file\n' 

1703 for a, atom in enumerate(self.atoms): 

1704 residue = atom.residue 

1705 index = str((a+1) % 100000).rjust(5) 

1706 name = ' ' + atom.name.ljust(3) if len(atom.name) < 4 else atom.name 

1707 residue_name = residue.name.ljust(4) if residue else 'XXX'.ljust(4) 

1708 chain = atom.chain 

1709 chain_name = chain.name if chain.name and len(chain.name) == 1 else 'X' 

1710 residue_number = str(residue.number).rjust(4) if residue else '0'.rjust(4) 

1711 # If residue number is longer than 4 characters then we must parse to hexadecimal 

1712 if len(residue_number) > 4: 

1713 residue_number = hex(residue.number)[2:].rjust(4) 

1714 icode = residue.icode if residue.icode and len(residue.icode) else ' ' 

1715 # Make sure we have atom coordinates 

1716 if atom.coords == None: 

1717 raise InputError('Trying to write a PDB file from a structure with atoms without coordinates') 

1718 x_coord, y_coord, z_coord = [ "{:.3f}".format(coord).rjust(8) for coord in atom.coords ] 

1719 occupancy = '1.00' # Just a placeholder 

1720 temp_factor = '0.00' # Just a placeholder 

1721 element = atom.element.rjust(2) 

1722 atom_line = ('ATOM ' + index + ' ' + name + ' ' + residue_name 

1723 + chain_name + residue_number + icode + ' ' + x_coord + y_coord + z_coord 

1724 + ' ' + occupancy + ' ' + temp_factor + ' ' + element).ljust(80) + '\n' 

1725 content += atom_line 

1726 return content 

1727 

1728 def generate_pdb_file (self, pdb_filepath : str): 

1729 """Generate a pdb file with current structure.""" 

1730 pdb_content = self.generate_pdb() 

1731 with open(pdb_filepath, "w") as file: 

1732 file.write(pdb_content) 

1733 

1734 def get_pytraj_topology (self): 

1735 """Get the structure equivalent pytraj topology.""" 

1736 # In we do not have pytraj in our environment then we cannot proceed 

1737 if not is_imported('pytraj'): 

1738 raise InputError('Missing dependency error: pytraj') 

1739 # Generate a pdb file from the current structure to feed pytraj 

1740 pdb_filepath = '.structure.pdb' 

1741 self.generate_pdb_file(pdb_filepath) 

1742 pytraj_topology = pytraj.load_topology(filename = pdb_filepath) 

1743 os.remove(pdb_filepath) 

1744 return pytraj_topology 

1745 

1746 SUPPORTED_SELECTION_SYNTAXES = { 'vmd', 'pytraj', 'gmx' } 

1747 def select (self, selection_string : str, syntax : str = 'vmd') -> Optional['Selection']: 

1748 """Select atoms from the structure thus generating an atom indices list. 

1749 Different tools may be used to make the selection: 

1750 - vmd (default) 

1751 - pytraj""" 

1752 if syntax == 'vmd': 

1753 # Generate a pdb for vmd to read it 

1754 auxiliar_pdb_filepath = '.structure.pdb' 

1755 self.generate_pdb_file(auxiliar_pdb_filepath) 

1756 # Use vmd to find atom indices 

1757 atom_indices = get_vmd_selection_atom_indices(auxiliar_pdb_filepath, selection_string) 

1758 os.remove(auxiliar_pdb_filepath) 

1759 if len(atom_indices) == 0: 

1760 return Selection() 

1761 return Selection(atom_indices) 

1762 if syntax == 'pytraj': 

1763 # In we do not have pytraj in our environment then we cannot proceed 

1764 if not is_imported('pytraj'): 

1765 raise InputError('Missing dependency error: pytraj') 

1766 pytraj_topology = self.get_pytraj_topology() 

1767 pytraj_selection = pytraj_topology[selection_string] 

1768 atom_indices = [ atom.index for atom in pytraj_selection.atoms ] 

1769 if len(atom_indices) == 0: 

1770 return Selection() 

1771 return Selection(atom_indices) 

1772 if syntax == 'gmx': 

1773 # Generate a pdb strucutre to feed gmx 

1774 auxiliar_pdb_filepath = '.structure.pdb' 

1775 self.generate_pdb_file(auxiliar_pdb_filepath) 

1776 auxiliar_pdb_file = File(auxiliar_pdb_filepath) 

1777 # Create the index file with the current atom selection 

1778 index_filepath = f'.structure.ndx' 

1779 index_file = File(index_filepath) 

1780 selection_name, selection_exists = make_index(auxiliar_pdb_file, index_file, selection_string) 

1781 if not selection_exists: 

1782 available_selections = ', '.join(list(index_groups.keys())) 

1783 raise InputError(f'Something was wrong with the selection. Available gromacs selections: {available_selections}') 

1784 # Read the index file to capture the selection of atoms 

1785 index_groups = read_ndx(index_file) 

1786 indices = index_groups.get(selection_name, None) 

1787 if indices == None: raise RuntimeError('Atom group must exist at this point') 

1788 auxiliar_pdb_file.remove() 

1789 index_file.remove() 

1790 # Return the parsed selection 

1791 return Selection(indices) 

1792 

1793 options = ', '.join(self.SUPPORTED_SELECTION_SYNTAXES) 

1794 raise InputError(f'Syntax "{syntax}" is not supported. Choose one of the following: {options}') 

1795 

1796 def select_atom_indices (self, atom_indices : list[int]) -> 'Selection': 

1797 """Set a function to make selections using atom indices.""" 

1798 # Check atom indices to be in the structure 

1799 atom_count = self.atom_count 

1800 for atom_index in atom_indices: 

1801 if atom_index >= atom_count: 

1802 raise InputError(f'Atom index {atom_index} is out of range ({atom_count})') 

1803 return Selection(atom_indices) 

1804 

1805 def select_residue_indices (self, residue_indices : list[int]) -> 'Selection': 

1806 """Set a function to make selections using residue indices.""" 

1807 # WARNING: The following line gets stucked sometimes, idk why 

1808 #atom_indices = sum([ self.residues[index].atom_indices for index in residue_indices ], []) 

1809 atom_indices = [] 

1810 for i, index in enumerate(residue_indices): 

1811 atom_indices += self.residues[index].atom_indices 

1812 return Selection(atom_indices) 

1813 

1814 def select_all (self) -> 'Selection': 

1815 """Get a selection with all atoms.""" 

1816 return Selection(list(range(self.atom_count))) 

1817 

1818 def select_by_classification (self, classification : str) -> 'Selection': 

1819 """Select atoms according to the classification of its residue.""" 

1820 atom_indices = [] 

1821 for residue in self.residues: 

1822 if residue.classification == classification: 

1823 atom_indices += residue.atom_indices 

1824 return Selection(atom_indices) 

1825 

1826 def select_water (self) -> 'Selection': 

1827 """Select water atoms. 

1828 WARNING: This logic is a bit guessy and it may fail for non-standard residue named structures 

1829 """ 

1830 return self.select_by_classification('solvent') 

1831 

1832 def select_ions (self) -> 'Selection': 

1833 """Select ions.""" 

1834 return self.select_by_classification('ion') 

1835 

1836 def select_counter_ions (self, charge : Optional[str] = None) -> 'Selection': 

1837 """Select counter ion atoms. 

1838 WARNING: This logic is a bit guessy and it may fail for non-standard atom named structures 

1839 """ 

1840 counter_ion_indices = [] 

1841 # Set the accepted names accoridng to the charge 

1842 if charge == None: 

1843 accepted_names = STANDARD_COUNTER_ION_ATOM_NAMES 

1844 elif charge == '+': 

1845 accepted_names = STANDARD_COUNTER_CATION_ATOM_NAMES 

1846 elif charge == '-': 

1847 accepted_names = STANDARD_COUNTER_ANION_ATOM_NAMES 

1848 else: 

1849 raise ValueError('Not supported charge') 

1850 # Iterate atoms 

1851 for atom in self.atoms: 

1852 # If the residue has not one single atom then it is not an ion 

1853 if len(atom.residue.atoms) != 1: continue 

1854 # Get a simplified version of the atom name 

1855 # Set all letters upper and remove non-letter characters (e.g. '+' and '-' symbols) 

1856 simple_atom_name = ''.join(filter(str.isalpha, atom.name.upper())) 

1857 if simple_atom_name in accepted_names: 

1858 counter_ion_indices.append(atom.index) 

1859 return Selection(counter_ion_indices) 

1860 

1861 def select_water_and_counter_ions (self) -> 'Selection': 

1862 """Select both water and counter ions.""" 

1863 return self.select_water() + self.select_counter_ions() 

1864 

1865 def select_heavy_atoms (self) -> 'Selection': 

1866 """Select heavy atoms.""" 

1867 atom_indices = [] 

1868 for atom in self.atoms: 

1869 # If the atom is not an hydrogen then add it to the list 

1870 if atom.element != 'H': 

1871 atom_indices.append(atom.index) 

1872 return Selection(atom_indices) 

1873 

1874 def select_protein (self) -> 'Selection': 

1875 """Select protein atoms. 

1876 WARNING: Note that there is a small difference between VMD protein and our protein. 

1877 This function is improved to consider terminal residues as proteins. 

1878 VMD considers protein any residue including N, C, CA and O while terminals may have OC1 and OC2 instead of O. 

1879 """ 

1880 return self.select_by_classification('protein') 

1881 

1882 def select_nucleic (self) -> 'Selection': 

1883 """Select nucleic atoms.""" 

1884 return self.select_by_classification('dna') + self.select_by_classification('rna') 

1885 

1886 def select_lipids (self) -> 'Selection': 

1887 """Select lipids.""" 

1888 return self.select_by_classification('fatty') + self.select_by_classification('steroid') 

1889 

1890 def select_carbohydrates (self) -> 'Selection': 

1891 """Select carbohydrates.""" 

1892 return self.select_by_classification('carbohydrate') 

1893 

1894 def select_pbc_guess (self) -> 'Selection': 

1895 """Return a selection of the typical PBC atoms: solvent, counter ions and lipids. 

1896 WARNING: This is just a guess.""" 

1897 return self.select_water() + self.select_counter_ions() + self.select_lipids() 

1898 

1899 def select_cg (self) -> 'Selection': 

1900 """Select coarse grain atoms.""" 

1901 return Selection([ atom.index for atom in self.atoms if atom.element == CG_ATOM_ELEMENT ]) 

1902 

1903 def select_cartoon (self, include_terminals : bool = False) -> 'Selection': 

1904 """Select cartoon representable regions for VMD. 

1905 

1906 Rules are: 

1907 1. Residues must be protein (i.e. must contain C, CA, N and O atoms) or nucleic (P, OP1, OP2, O3', C3', C4', C5', O5') 

1908 2. There must be at least 3 covalently bonded residues 

1909  

1910 It does not matter their chain, numeration or even index order as long as they are bonded. 

1911 * Note that we can represent cartoon while we display one residue alone, but it must be connected anyway. 

1912 Also, we have the option to include terminals in the cartoon selection although they are not representable. 

1913 This is helpful for the screenshot: terminals are better hidden than represented as ligands.""" 

1914 # Set fragments which are candidate to be cartoon representable 

1915 fragments = [] 

1916 # Get protein fragments according to VMD 

1917 protein_selection = self.select_protein() - self.select_cg() if include_terminals else self.select('protein', syntax='vmd') 

1918 if protein_selection: 

1919 fragments += list(self.find_fragments(protein_selection)) 

1920 # Get nucleic fragments according to VMD 

1921 nucleic_selection = self.select_nucleic() - self.select_cg() 

1922 if nucleic_selection: 

1923 fragments += list(self.find_fragments(nucleic_selection)) 

1924 # Set the final selection including all valid fragments 

1925 cartoon_selection = Selection() 

1926 # Iterate over every fragment 

1927 for fragment in fragments: 

1928 # Make sure the fragment has at least 3 residues before adding it to the cartoon selection 

1929 fragment_residue_indices = self.get_selection_residue_indices(fragment) 

1930 if len(fragment_residue_indices) >= 3: 

1931 cartoon_selection += fragment 

1932 return cartoon_selection 

1933 

1934 def invert_selection (self, selection : 'Selection') -> 'Selection': 

1935 """Invert a selection.""" 

1936 atom_indices = list(range(self.atom_count)) 

1937 for atom_index in selection.atom_indices: 

1938 atom_indices[atom_index] = None 

1939 return Selection([ atom_index for atom_index in atom_indices if atom_index != None ]) 

1940 

1941 def get_selection_residue_indices (self, selection : 'Selection') -> list[int]: 

1942 """Given a selection, get a list of residue indices for residues implicated. 

1943 Note that if a single atom from the residue is in the selection then the residue index is returned.""" 

1944 return list(set([ self.atoms[atom_index].residue_index for atom_index in selection.atom_indices ])) 

1945 

1946 def get_selection_residues (self, selection : 'Selection') -> list['Residue']: 

1947 """Given a selection, get a list of residues implicated. 

1948 Note that if a single atom from the residue is in the selection then the residue is returned.""" 

1949 residue_indices = self.get_selection_residue_indices(selection) 

1950 return [ self.residues[index] for index in residue_indices ] 

1951 

1952 def get_selection_chain_indices (self, selection : 'Selection') -> list[int]: 

1953 """Given a selection, get a list of chain indices for chains implicated. 

1954 Note that if a single atom from the chain is in the selection then the chain index is returned.""" 

1955 return list(set([ self.atoms[atom_index].chain_index for atom_index in selection.atom_indices ])) 

1956 

1957 def get_selection_chains (self, selection : 'Selection') -> list['Chain']: 

1958 """Given a selection, get a list of chains implicated. 

1959 Note that if a single atom from the chain is in the selection then the chain is returned.""" 

1960 chain_indices = self.get_selection_chain_indices(selection) 

1961 return [ self.chains[index] for index in chain_indices ] 

1962 

1963 def get_selection_classification (self, selection : 'Selection') -> str: 

1964 """Get type of the chain.""" 

1965 # Get selection residues 

1966 selection_residue_indices = self.get_selection_residue_indices(selection) 

1967 

1968 # Inicializar contadores para cada tipo de residuo 

1969 residue_counts = {} 

1970 

1971 # Count the residues of each type 

1972 for residue_index in selection_residue_indices: 

1973 residue = self.residues[residue_index] 

1974 res_class = residue.classification 

1975 if res_class in residue_counts: 

1976 residue_counts[res_class] += 1 

1977 else: 

1978 # If the classification is not recognized, count it as "other" 

1979 residue_counts[res_class] = 1 

1980 

1981 # Count the total number of residues in the selection 

1982 total_residues = sum(residue_counts.values()) 

1983 if total_residues == 0: raise ValueError('Should have residues at this point') 

1984 

1985 # Calculate the proportion of each type of residue 

1986 proportions = { k: v / total_residues for k, v in residue_counts.items() } 

1987 

1988 # If one type of residue dominates, return it 

1989 primary_type = max(proportions, key=proportions.get) 

1990 # We establish a threshold of 80% to consider a chain as a single type 

1991 if proportions[primary_type] >= 0.8: 

1992 return primary_type 

1993 

1994 # Special cases 

1995 relevant_threshold = 0.3 

1996 if proportions.get("dna", 0) > relevant_threshold and proportions.get("rna", 0) > relevant_threshold: 

1997 return "nucleic" 

1998 if proportions.get("carbohydrate", 0) > relevant_threshold and proportions.get("protein", 0) > relevant_threshold: 

1999 return "glycoprotein" 

2000 if proportions.get("fatty", 0) > relevant_threshold and proportions.get("steroid", 0) > relevant_threshold: 

2001 return "lipid" 

2002 

2003 # Any other combinations of different main proportions 

2004 main_proportions = { k: v for k, v in proportions.items() if v > relevant_threshold } 

2005 # Nothing is above the threshold 

2006 if len(main_proportions) == 0: 

2007 return "mix" 

2008 # There is only one thing above threshold 

2009 elif len(main_proportions) == 1: 

2010 return f"{primary_type}/mix" 

2011 # There are two things above threshold 

2012 elif len(main_proportions) == 2: 

2013 other_type = next(key for key in main_proportions.keys() if key != primary_type) 

2014 return f"{primary_type}/{other_type}" 

2015 # There are three things above the threshold (more is not possible) 

2016 else: 

2017 return "mix" 

2018 

2019 def filter (self, selection : Union['Selection', str], selection_syntax : str = 'vmd') -> 'Structure': 

2020 """Create a new structure from the current using a selection to filter atoms.""" 

2021 if not selection: raise RuntimeError('No selection was passed') 

2022 # In case the selection is not an actual Selection, but a string, parse the string into a Selection 

2023 if type(selection) == str: 

2024 selection = self.select(selection, selection_syntax) 

2025 new_atoms = [] 

2026 new_residues = [] 

2027 new_chains = [] 

2028 # Get the selected atoms 

2029 # Generate dictionaries with new indexes as keys and previous indexes as values for atoms, residues and chains 

2030 # This is done with this structure for the residues and chains further to find the new indices fast 

2031 new_atom_indices = {} 

2032 # Collect also original indices to related atom residues and chains 

2033 original_atom_residue_indices = [] 

2034 for new_index, original_index in enumerate(selection.atom_indices): 

2035 # Make a copy of the selected atom in order to not modify the original one 

2036 original_atom = self.atoms[original_index] 

2037 new_atom = Atom( 

2038 name=original_atom.name, 

2039 element=original_atom.element, 

2040 coords=original_atom.coords, 

2041 ) 

2042 new_atoms.append(new_atom) 

2043 # Save old and new indices in order to reconfigure all indices later 

2044 new_atom_indices[original_index] = new_index 

2045 original_atom_residue_indices.append(original_atom.residue_index) 

2046 # Find the selected residues 

2047 selected_residue_indices = list(set(original_atom_residue_indices)) 

2048 # Repeat the original/new indices backup we did before 

2049 new_residue_indices = {} 

2050 original_residue_atom_indices = [] 

2051 original_residue_chain_indices = [] 

2052 for new_index, original_index in enumerate(selected_residue_indices): 

2053 # Make a copy of the selected residue in order to not modify the original one 

2054 original_residue = self.residues[original_index] 

2055 new_residue = Residue( 

2056 name=original_residue.name, 

2057 number=original_residue.number, 

2058 icode=original_residue.icode, 

2059 ) 

2060 new_residues.append(new_residue) 

2061 # Save old and new indices in order to reconfigure all indices later 

2062 new_residue_indices[original_index] = new_index 

2063 original_residue_atom_indices.append(original_residue.atom_indices) 

2064 original_residue_chain_indices.append(original_residue.chain_index) 

2065 # Find the selected chains 

2066 selected_chain_indices = list(set(original_residue_chain_indices)) 

2067 # Repeat the original/new indices backup we did before 

2068 new_chain_indices = {} 

2069 original_chain_residue_indices = [] 

2070 for new_index, original_index in enumerate(selected_chain_indices): 

2071 # Make a copy of the selected chain in order to not modify the original one 

2072 original_chain = self.chains[original_index] 

2073 new_chain = Chain( 

2074 name=original_chain.name, 

2075 ) 

2076 new_chains.append(new_chain) 

2077 # Save old and new indices in order to reconfigure all indices later 

2078 new_chain_indices[original_index] = new_index 

2079 original_chain_residue_indices.append(original_chain.residue_indices) 

2080 # Finally, reset indices in all instances 

2081 for a, atom in enumerate(new_atoms): 

2082 atom.residue_index = new_residue_indices[ original_atom_residue_indices[a] ] 

2083 for r, residue in enumerate(new_residues): 

2084 atom_indices = [] 

2085 for original_index in original_residue_atom_indices[r]: 

2086 new_index = new_atom_indices.get(original_index, None) 

2087 if new_index != None: 

2088 atom_indices.append(new_index) 

2089 residue.atom_indices = atom_indices 

2090 residue.chain_index = new_chain_indices[ original_residue_chain_indices[r] ] 

2091 for c, chain in enumerate(new_chains): 

2092 residue_indices = [] 

2093 for original_index in original_chain_residue_indices[c]: 

2094 new_index = new_residue_indices.get(original_index, None) 

2095 if new_index != None: 

2096 residue_indices.append(new_index) 

2097 chain.residue_indices = residue_indices 

2098 return Structure(atoms=new_atoms, residues=new_residues, chains=new_chains) 

2099 

2100 def chainer (self, 

2101 selection : Optional['Selection'] = None, 

2102 letter : Optional[str] = None, 

2103 whole_fragments : bool = False): 

2104 """Set chains on demand. 

2105 If no selection is passed then the whole structure will be affected. 

2106 If no chain is passed then a "chain by fragment" logic will be applied.""" 

2107 # If there is no selection we consider all atoms 

2108 if selection == None: 

2109 selection = self.select_all() 

2110 # If the selection is empty then there is nothing to do here 

2111 if len(selection) == 0: return 

2112 # If a letter is specified then the logic is way simpler 

2113 if letter and not whole_fragments: 

2114 self.set_selection_chain_name(selection, letter) 

2115 return 

2116 # If a letter is not specified we run the "fragments" logic 

2117 fragment_getter = self.find_whole_fragments if whole_fragments else self.find_fragments 

2118 for fragment in fragment_getter(selection): 

2119 chain_name = letter if letter else self.get_available_chain_name() 

2120 self.set_selection_chain_name(fragment, chain_name) 

2121 

2122 def auto_chainer (self, verbose : bool = False): 

2123 """Smart function to set chains automatically. 

2124 Original chains will be overwritten.""" 

2125 if verbose: print('Running auto-chainer') 

2126 # Set all chains to X 

2127 self.chainer(letter='X') 

2128 # Set solvent and ions as a unique chain 

2129 ion_selection = self.select_ions() 

2130 if verbose: print(f' Ion atoms: {len(ion_selection)}') 

2131 solvent_selection = self.select_water() 

2132 if verbose: print(f' Solvent atoms: {len(solvent_selection)}') 

2133 ion_and_indices_selection = ion_selection + solvent_selection 

2134 self.chainer(selection=ion_and_indices_selection, letter='S') 

2135 # Set fatty acids and steroids as a unique chain 

2136 # RUBEN: con whole_fragments algunos carbohidratos se sobreescriben como glucolípidos 

2137 # DANI: Se podrían descartarían residuos que no pertenezcan a la membrana por proximidad 

2138 membrane_selection = self.select_lipids() 

2139 if verbose: print(f' Membrane atoms: {len(membrane_selection)}') 

2140 self.chainer(selection=membrane_selection, letter='M', whole_fragments=True) 

2141 # Set carbohydrates as a unique chain as well, just in case we have glycans 

2142 # Note that in case glycan atoms are mixed with protein atoms glycan chains will be overwritten 

2143 # However this is not a problem. It is indeed the best solution if we don't want ro resort atoms 

2144 carbohydrate_selection = self.select_carbohydrates() 

2145 if verbose: print(f' Carbohydrate atoms: {len(carbohydrate_selection)}') 

2146 self.chainer(selection=carbohydrate_selection, letter='H') 

2147 # Add a chain per fragment for both proteins and nucleic acids 

2148 protein_selection = self.select_protein() 

2149 if verbose: print(f' Protein atoms: {len(protein_selection)}') 

2150 nucleic_selection = self.select_nucleic() 

2151 if verbose: print(f' Nucleic acid atoms: {len(nucleic_selection)}') 

2152 protein_and_nucleic_selection = protein_selection + nucleic_selection 

2153 self.chainer(selection=protein_and_nucleic_selection) 

2154 # At this point we should have covered most of the molecules in the structure 

2155 # However, in case there are more molecules, we have already set them all as a single chain ('X') 

2156 # Here we do not apply the chain per fragment logic since it may be dangerous 

2157 # Note that we may have a lot of independent residues (and thus a los of small fragments) 

2158 # This would make us run out of letters in the alphabet and thus there would be no more chains 

2159 # As the last step, fix repeated chains 

2160 # RUBEN: sacar esta parte ya que se hace luego en el structure_corrector? 

2161 self.check_repeated_chains(fix_chains=True) 

2162 

2163 def raw_protein_chainer (self): 

2164 """This is an alternative system to find protein chains (anything else is chained as 'X'). 

2165 This system does not depend on VMD. 

2166 It totally overrides previous chains since it is expected to be used only when chains are missing.""" 

2167 current_chain = self.get_available_chain_name() 

2168 previous_alpha_carbon = None 

2169 for residue in self.residues: 

2170 alpha_carbon = next((atom for atom in residue.atoms if atom.name == 'CA'), None) 

2171 if not alpha_carbon: 

2172 residue.set_chain('X') 

2173 continue 

2174 # Connected aminoacids have their alpha carbons at a distance of around 3.8 Ångstroms 

2175 residues_are_connected = previous_alpha_carbon and calculate_distance(previous_alpha_carbon, alpha_carbon) < 4 

2176 if not residues_are_connected: 

2177 current_chain = self.get_available_chain_name() 

2178 residue.set_chain(current_chain) 

2179 previous_alpha_carbon = alpha_carbon 

2180 

2181 def set_selection_chain_name (self, selection : 'Selection', letter : str): 

2182 """ 

2183 Given an atom selection, set the chain for all these atoms. 

2184 Note that the chain is changed in every whole residue, no  

2185 matter if only one atom was selected. 

2186 """ 

2187 # Find if the letter belongs to an already existing chain 

2188 chain = self.get_chain_by_name(letter) 

2189 if not chain: 

2190 chain = Chain(name=letter) 

2191 self.set_new_chain(chain) 

2192 # Get the selection residue indices 

2193 selection_residue_indices = self.get_selection_residue_indices(selection) 

2194 # Set the chain index of every residue to the new chain 

2195 # WARNING:  

2196 # This may have to be done atom by atom but we forgot why so we change  

2197 # it so see what was the problem. Doing it atom by atom is not efficient 

2198 # and was causing a bug where residues with same number could change their name 

2199 # to that of the first residue found with that number. 

2200 for residue_index in selection_residue_indices: 

2201 # WARNING: Chain index has to be read every iteration since it may change. Do not save it 

2202 residue = self.residues[residue_index] 

2203 residue.set_chain_index(chain.index) 

2204 

2205 def check_available_chains(self): 

2206 """Check if there are more chains than available letters.""" 

2207 n_chains = len(self.chains) 

2208 n_available_letters = len(AVAILABLE_LETTERS) 

2209 if n_chains <= n_available_letters: return 

2210 raise InputError(f'There are {n_chains} chains in the structure so far. If this is not expected then there may be a problem.\n' + 

2211 f' If this is expected then unfortunatelly there is a limit of {n_available_letters} available chain letters.\n' + 

2212 ' Please manually set the chains from scratch or merge together some chains to reduce the number.\n' + 

2213 ' Customize a PDB file using the command "mwf chainer". Then use the customized PDB as input structure (-stru).') 

2214 

2215 def get_available_chain_name (self) -> str: 

2216 """Get an available chain name. 

2217 Find alphabetically the first letter which is not yet used as a chain name. 

2218 If all letters in the alphabet are used already then raise an error.""" 

2219 self.check_available_chains() 

2220 current_chain_names = [ chain.name for chain in self.chains ] 

2221 next_available_chain_name = next((name for name in AVAILABLE_LETTERS if name not in current_chain_names), None) 

2222 return next_available_chain_name 

2223 

2224 def get_next_available_chain_name (self, anterior : str) -> str: 

2225 """ 

2226 Get the next available chain name. 

2227 

2228 Args: 

2229 anterior (str): The last chain name used, which is expected to be a single letter 

2230  

2231 Raises: 

2232 ValueError: If the anterior is not a letter or if there are more chains than available 

2233 """ 

2234 self.check_available_chains() 

2235 current_chain_names = set([ chain.name for chain in self.chains ]) 

2236 # If anterior is cap then try caps first, if anterior is lower then try lowers first 

2237 if anterior.isupper(): 

2238 first_group, second_group = AVAILABLE_CAPS, AVAILABLE_LOWS 

2239 elif anterior.islower(): 

2240 first_group, second_group = AVAILABLE_LOWS, AVAILABLE_CAPS 

2241 else: raise ValueError(f'Is "{anterior}" even a letter?') 

2242 # Reorder letters in the first group, so the anterior is the last letter 

2243 anterior_position = first_group.index(anterior) 

2244 next_position = anterior_position + 1 

2245 reordered_group = first_group[next_position:] + first_group[0:next_position] 

2246 next_letter = next((letter for letter in reordered_group if letter not in current_chain_names), None) 

2247 if next_letter: return next_letter 

2248 # If there is not available letters is the first group then return the first available in the second 

2249 next_letter = next((letter for letter in second_group if letter not in current_chain_names), None) 

2250 return next_letter 

2251 

2252 def get_chain_by_name (self, name : str) -> Optional['Chain']: 

2253 """Get a chain by its name.""" 

2254 return next((c for c in self.chains if c.name == name), None) 

2255 

2256 def find_residue (self, chain_name : str, number : int, icode : str = '' ) -> Optional['Residue']: 

2257 """Find a residue by its chain, number and insertion code.""" 

2258 # Get the corresponding chain 

2259 target_chain = self.get_chain_by_name(chain_name) 

2260 if not target_chain: return None 

2261 # Find the residue in the target chain 

2262 target_chain.find_residue(number, icode) 

2263 

2264 def display_summary (self): 

2265 """Get a summary of the structure.""" 

2266 print(f'Atoms: {self.atom_count}') 

2267 print(f'Residues: {len(self.residues)}') 

2268 print(f'Chains: {len(self.chains)}') 

2269 for chain in self.chains: 

2270 print(f'Chain {chain.name} ({len(chain.residue_indices)} residues)') 

2271 print(' -> ' + chain.get_sequence()) 

2272 

2273 def check_repeated_chains (self, fix_chains : bool = False, display_summary : bool = False) -> bool: 

2274 """ 

2275 There may be chains which are equal in the structure (i.e. same chain name). 

2276 This means we have a duplicated/splitted chain. 

2277 Repeated chains are usual and they are usually supported but with some problems. 

2278 Also, repeated chains usually come with repeated residues, which means more problems (see explanation below). 

2279 

2280 In the context of this structure class we may have 2 different problems with a different solution each: 

2281 1. There is more than one chain with the same letter (repeated chain) -> rename the duplicated chains 

2282 2. There is a chain with atom indices which are not consecutive (splitted chain) -> create new chains 

2283 

2284 Rename repeated chains or create new chains if the fix_chains argument is True. 

2285 

2286 WARNING: These fixes are possible only if there are less chains than the number of letters in the alphabet. 

2287 Although there is no limitation in this code for chain names, setting long chain names is not compatible with pdb format. 

2288 

2289 Check splitted chains (a chains with non consecutive residues) and try to fix them if requested. 

2290 Check repeated chains (two chains with the same name) and return True if there were any repeats. 

2291 """ 

2292 # Order chains according to their names 

2293 # Save also those chains which have a previous duplicate 

2294 name_chains = {} 

2295 repeated_chains = [] 

2296 for chain in self.chains: 

2297 chain_name = chain.name 

2298 current_name_chains = name_chains.get(chain_name, None) 

2299 if not current_name_chains: 

2300 name_chains[chain_name] = [chain] 

2301 else: 

2302 name_chains[chain_name].append(chain) 

2303 repeated_chains.append(chain) 

2304 # Display the summary of repeated chains if requested 

2305 if display_summary: 

2306 if len(repeated_chains) > 0: 

2307 warn('There are repeated chains:') 

2308 for chain_name, chains in name_chains.items(): 

2309 chains_count = len(chains) 

2310 if chains_count > 1: 

2311 print(f'- Chain {chain_name} has {chains_count} repeats') 

2312 # Rename repeated chains if requested 

2313 if len(repeated_chains) > 0 and fix_chains: 

2314 self.check_available_chains() 

2315 current_letters = list(name_chains.keys()) 

2316 for repeated_chain in repeated_chains: 

2317 last_chain_letter = repeated_chain.name 

2318 while last_chain_letter in current_letters: 

2319 last_chain_letter = get_next_letter(last_chain_letter) 

2320 repeated_chain.name = last_chain_letter 

2321 current_letters.append(last_chain_letter) 

2322 # Check if there are splitted chains 

2323 # RUBEN: sacar esta parte ya que esta self.check_splitted_chains 

2324 for chain in self.chains: 

2325 residue_indices = sorted(chain.residue_indices) 

2326 # Check if residue indices are consecutive 

2327 if residue_indices[-1] - residue_indices[0] + 1 == len(residue_indices): 

2328 continue 

2329 warn(f'Splitted chain {chain.name}') 

2330 # If indices are not consecutive then we must find ranges of consecutive residues and create new chains for them 

2331 previous_residue_index = residue_indices[0] 

2332 consecutive_residues = [previous_residue_index] 

2333 overall_consecutive_residues = [] 

2334 for residue_index in residue_indices[1:]: 

2335 # If next residue is consecutive 

2336 if residue_index == previous_residue_index + 1: 

2337 consecutive_residues.append(residue_index) 

2338 # If next residue is NOT consecutive 

2339 else: 

2340 overall_consecutive_residues.append(consecutive_residues) 

2341 consecutive_residues = [residue_index] 

2342 # Update the previous index 

2343 previous_residue_index = residue_index 

2344 # Add the last split 

2345 overall_consecutive_residues.append(consecutive_residues) 

2346 # Now create new chains and reasign residues 

2347 # Skip the first set of consecutive residues since they will stay in the original chain 

2348 for residues_indices in overall_consecutive_residues[1:]: 

2349 chain_name = self.get_available_chain_name() 

2350 residues_selection = self.select_residue_indices(residues_indices) 

2351 self.set_selection_chain_name(residues_selection, chain_name) 

2352 

2353 # Fix repeated chains if requested 

2354 return len(repeated_chains) > 0 

2355 

2356 def check_splitted_chains (self, fix_chains : bool = False, display_summary : bool = False) -> bool: 

2357 """Check if non-consecutive atoms belong to the same chain. 

2358 If so, separate pieces of non-consecuite atoms in different chains. 

2359 Note that the new chains will be duplicated, so you will need to run check_repeated_chains after. 

2360 

2361 Args: 

2362 fix_chains (bool): If True then the splitted chains will be fixed. 

2363 display_summary (bool): If True then a summary of the splitted chains will be displayed. 

2364 

2365 Returns: 

2366 bool: 

2367 True if we encountered splitted chains and false otherwise. 

2368 """ 

2369 splitted_fragments: list[tuple[str, list[int]]] = [] 

2370 # Keep track of already checked chains 

2371 checked_chains = set() 

2372 last_chain = None 

2373 last_fragment_start = None 

2374 for atom_index, atom in enumerate(self.atoms): 

2375 # Get the chain name 

2376 chain_name = atom.chain.name 

2377 # Skip the atom if it belong to the previous chain 

2378 if chain_name == last_chain: continue 

2379 # If we were in a splitted fragment then end it here 

2380 if last_fragment_start != None: 

2381 new_fragment_indices = list(range(last_fragment_start, atom_index)) 

2382 new_fragment = (last_chain, new_fragment_indices) 

2383 splitted_fragments.append(new_fragment) 

2384 last_fragment_start = None 

2385 # Check if the chain was already found 

2386 if chain_name in checked_chains: 

2387 # Start a new fragment 

2388 last_fragment_start = atom_index 

2389 # Update last chain 

2390 last_chain = chain_name 

2391 # Add the new chain to the set of already checked chains 

2392 checked_chains.add(chain_name) 

2393 

2394 # Make a summary of the splitted chains if requested 

2395 if display_summary and len(splitted_fragments) > 0: 

2396 warn(f'Found {len(splitted_fragments)} splitted fragments') 

2397 affected_chains = sorted(list(set([ fragment[0] for fragment in splitted_fragments ]))) 

2398 print(f' We are having splits in chains {", ".join(affected_chains)}') 

2399 

2400 # Fix chains if requested 

2401 if fix_chains: 

2402 for chain_name, fragment_atom_indices in splitted_fragments: 

2403 # Create a new chain 

2404 new_chain = Chain(name=chain_name) 

2405 self.set_new_chain(new_chain) 

2406 # Ge the selection residue indices for the fragment 

2407 fragment_selection = self.select_atom_indices(fragment_atom_indices) 

2408 fragment_residue_indices = self.get_selection_residue_indices(fragment_selection) 

2409 # Move atoms in the fragment to the new chain 

2410 for residue_index in fragment_residue_indices: 

2411 residue = self.residues[residue_index] 

2412 residue.set_chain_index(new_chain.index) 

2413 return len(splitted_fragments) > 0 

2414 

2415 def sort_residues (self): 

2416 """Coherently sort residues according to the indices of the atoms they hold.""" 

2417 # Set a function to sort atoms and residues by index 

2418 def by_first_atom_index (residue): 

2419 return min(residue.atom_indices) 

2420 # Sort residues according to their first atom index 

2421 sorted_residues = sorted(self.residues, key = by_first_atom_index) 

2422 # Iterate sorted residues letting them know their new index 

2423 for r, residue in enumerate(sorted_residues): 

2424 residue.index = r 

2425 # Finally update the structure's residues list 

2426 self.residues = sorted_residues 

2427 

2428 def check_merged_residues (self, fix_residues : bool = False, display_summary : bool = False) -> bool: 

2429 """ 

2430 There may be residues which contain unconnected (unbonded) atoms. They are not allowed. 

2431 They may come from a wrong parsing and be indeed duplicated residues. 

2432 

2433 Search for merged residues. 

2434 Create new residues for every group of connected atoms if the fix_residues argument is True. 

2435 Note that the new residues will be repeated, so you will need to run check_repeated_residues after. 

2436 Return True if there were any merged residues. 

2437 """ 

2438 # Get the list of merged residues we encounter 

2439 merged_residues = [] 

2440 # Iterate residues 

2441 for residue in self.residues: 

2442 residue_selection = residue.get_selection() 

2443 residue_fragments = list(self.find_fragments(residue_selection)) 

2444 if len(residue_fragments) <= 1: continue 

2445 # If current residue has more than 1 fragment then it is a merged residue 

2446 merged_residues.append(residue) 

2447 if not fix_residues: continue 

2448 # If the residue is to be fixed then let the first fragment as the current residue 

2449 # Then create a new residue for every other fragment 

2450 for extra_fragment in residue_fragments[1:]: 

2451 # Set a new residue identical to the current one 

2452 new_residue = Residue(residue.name, residue.number, residue.icode) 

2453 self.set_new_residue(new_residue) 

2454 # Add it to the same chain 

2455 residue.chain.add_residue(new_residue) 

2456 # Add atoms to it 

2457 for atom_index in extra_fragment.atom_indices: 

2458 atom = self.atoms[atom_index] 

2459 new_residue.add_atom(atom) 

2460 # Now that we have new resiudes, sort all residues to keep them coherent 

2461 self.sort_residues() 

2462 # Count how many merged residues we encountered 

2463 merged_residues_count = len(merged_residues) 

2464 # Log some details if the summary is requested 

2465 if display_summary and merged_residues_count > 0: 

2466 print(f'Found {merged_residues_count} merged residues') 

2467 print(f' e.g. {merged_residues[0]}') 

2468 # Return if we found merged residues 

2469 return merged_residues_count > 0 

2470 

2471 def check_repeated_residues (self, fix_residues : bool = False, display_summary : bool = False) -> bool: 

2472 """ 

2473 There may be residues which are equal in the structure (i.e. same chain, number and icode). 

2474 In case 2 residues in the structure are equal we must check distance between their atoms. 

2475 If atoms are far it means they are different residues with the same notation (duplicated residues). 

2476 If atoms are close it means they are indeed the same residue (splitted residue). 

2477 

2478 Splitted residues are found in some pdbs and they are supported by some tools. 

2479 These tools consider all atoms with the same 'record' as the same residue. 

2480 However, there are other tools which would consider the splitted residue as two different residues. 

2481 This causes inconsistency along different tools besides a long list of problems. 

2482 The only possible is fix is changing the order of atoms in the topology. 

2483 Note that this is a breaking change for associated trajectories, which must change the order of coordinates. 

2484 However here we provide tools to fix associates trajectories as well. 

2485 

2486 Duplicated residues are usual and they are usually supported but with some problems. 

2487 For example, pytraj analysis outputs use to sort results by residues and each residue is tagged. 

2488 If there are duplicated residues with the same tag it may be not possible to know which result belongs to each residue. 

2489 Another example are NGL selections once in the web client. 

2490 If you select residue ':A and 1' and there are multiple residues 1 in chain A all of them will be displayed. 

2491 

2492 Check residues to search for duplicated and splitted residues. 

2493 Renumerate repeated residues if the fix_residues argument is True. 

2494 Return True if there were any repeats. 

2495 """ 

2496 # Track if residues have to be changed or not 

2497 modified = False 

2498 # Group all residues in the structure according to their chain, number and icode 

2499 grouped_residues = {} 

2500 # Check repeated residues which are one after the other 

2501 # Note that these residues MUST have a different name 

2502 # Otherwise they would have not been considered different residues 

2503 # For these rare cases we use icodes to solve the problem 

2504 non_icoded_residues = [] 

2505 last_residue = None 

2506 for residue in self.residues: 

2507 # Check residue to be equal than the previous residue 

2508 if residue == last_residue: 

2509 non_icoded_residues.append(residue) 

2510 last_residue = residue 

2511 continue 

2512 last_residue = residue 

2513 # Add residue to the groups of repeated residues 

2514 current_residue_repeats = grouped_residues.get(residue, None) 

2515 if not current_residue_repeats: 

2516 grouped_residues[residue] = [ residue ] 

2517 else: 

2518 current_residue_repeats.append(residue) 

2519 # In case we have non icoded residues 

2520 if len(non_icoded_residues) > 0: 

2521 if display_summary: 

2522 print(f'There are non-icoded residues ({len(non_icoded_residues)})') 

2523 # Set new icodes for non icoded residues 

2524 if fix_residues: 

2525 print(' Non icoded residues will recieve an icode') 

2526 for residue in non_icoded_residues: 

2527 repeated_residues_group = grouped_residues[residue] 

2528 current_icodes = [ residue.icode for residue in repeated_residues_group if residue.icode ] 

2529 next_icode = next((cap for cap in AVAILABLE_CAPS if cap not in current_icodes), None) 

2530 if not next_icode: 

2531 raise ValueError('There are no more icodes available') 

2532 # print('Setting icode ' + next_icode + ' to residue ' + str(residue)) 

2533 residue.icode = next_icode 

2534 modified = True 

2535 # Grouped residues with more than 1 result are considered as repeated 

2536 repeated_residues = [ residues for residues in grouped_residues.values() if len(residues) > 1 ] 

2537 if len(repeated_residues) == 0: 

2538 return modified 

2539 # In case we have repeated residues... 

2540 if display_summary: 

2541 warn(f'There are {len(repeated_residues)} different groups of repeated residues') 

2542 print(f' e.g. {repeated_residues[0][0]}') 

2543 if len(repeated_residues) == 9999 or len(repeated_residues) == 10000: 

2544 print(' Probably you have more residues than the PDB numeration limit (1-9999)') 

2545 # Now for each repeated residue, find out which are splitted and which are duplicated 

2546 covalent_bonds = self.bonds 

2547 overall_splitted_residues = [] 

2548 overall_duplicated_residues = [] 

2549 for residues in repeated_residues: 

2550 # Iterate over repeated residues and check if residues are covalently bonded 

2551 # If any pair of residues are bonded add them both to the splitted residues list 

2552 # At the end, all non-splitted residues will be considered duplicated residues 

2553 # WARNING: Using a set here is not possible since repeated residues have the same hash 

2554 # WARNING: Also comparing residues themselves is not advisable, so we use indices at this point 

2555 splitted_residue_indices = set() 

2556 for residue, other_residues in otherwise(residues): 

2557 if residue.index in splitted_residue_indices: 

2558 continue 

2559 # Get atom indices for all atoms connected to the current residue 

2560 residue_bonds = sum([ covalent_bonds[index] for index in residue.atom_indices ], []) 

2561 for other_residue in other_residues: 

2562 # Get all atom indices for each other residue and collate with the current residue bonds 

2563 if any( index in residue_bonds for index in other_residue.atom_indices ): 

2564 splitted_residue_indices.add(residue.index) 

2565 splitted_residue_indices.add(other_residue.index) 

2566 # Finally obtain the splitted residues from its indices 

2567 splitted_residues = [ self.residues[index] for index in splitted_residue_indices ] 

2568 # Repeated residues which are not splitted are thus duplicated 

2569 duplicated_residues = [ residue for residue in residues if residue.index not in splitted_residue_indices ] 

2570 # Update the overall lists 

2571 if len(splitted_residues) > 0: 

2572 overall_splitted_residues.append(splitted_residues) 

2573 if len(duplicated_residues) > 0: 

2574 overall_duplicated_residues.append(duplicated_residues) 

2575 # In case we have splitted residues 

2576 if len(overall_splitted_residues) > 0: 

2577 if display_summary: 

2578 print(f' There are splitted residues ({len(overall_splitted_residues)})') 

2579 # Fix splitted residues 

2580 if fix_residues: 

2581 print(' Splitted residues will be merged') 

2582 # Set a function to sort atoms and residues by index 

2583 by_index = lambda v: v._index 

2584 # Merge splitted residues 

2585 # WARNING: Merging residues without sorting atoms is possible, but this would be lost after exporting to pdb 

2586 for splitted_residues in overall_splitted_residues: 

2587 # The first residue (i.e. the residue with the lower index) will be the one which remains 

2588 # It will absorb other residue atom indices 

2589 splitted_residues.sort(key=by_index) # Residues are not sorted by default, this is mandatory 

2590 first_residue = splitted_residues[0] 

2591 other_residues = splitted_residues[1:] 

2592 for residue in other_residues: 

2593 for atom in residue.atoms: 

2594 atom.residue = first_residue 

2595 print(' Atoms will be sorted to be together by residues') 

2596 print(' NEVER FORGET: This will break any associated trajectory if coordinates are not sorted as well') 

2597 # Sort atoms to group residue atoms together 

2598 # Note that each atom index must be updated 

2599 new_atom_indices = sum([ residue.atom_indices for residue in self.residues ], []) 

2600 for i, new_atom_index in enumerate(new_atom_indices): 

2601 atom = self.atoms[new_atom_index] 

2602 atom._index = i 

2603 self.atoms.sort(key=by_index) 

2604 # Also residue 'atom_indices' must be updated 

2605 for residue in self.residues: 

2606 residue._atom_indices = [ new_atom_indices.index(atom_index) for atom_index in residue._atom_indices ] 

2607 # Bonds must be reset since atom indices have changes 

2608 self._bonds = None 

2609 # Prepare the trajectory atom sorter which must be returned 

2610 # Include atom indices already so the user has to provide only the structure and trajectory filepaths 

2611 def trajectory_atom_sorter ( 

2612 input_structure_file : 'File', 

2613 input_trajectory_file : 'File', 

2614 output_trajectory_file : 'File' 

2615 ): 

2616 sort_trajectory_atoms( 

2617 input_structure_file, 

2618 input_trajectory_file, 

2619 output_trajectory_file, 

2620 new_atom_indices 

2621 ) 

2622 self.trajectory_atom_sorter = trajectory_atom_sorter 

2623 self.new_atom_order = new_atom_indices 

2624 modified = True 

2625 # In case we have duplicated residues 

2626 duplicated_residues_count = len(overall_duplicated_residues) 

2627 if duplicated_residues_count > 0: 

2628 if display_summary: 

2629 warn(f'There are {duplicated_residues_count} different groups of duplicated residues') 

2630 print(f' e.g. {overall_duplicated_residues[0][0]}') 

2631 # Renumerate duplicated residues if requested 

2632 if fix_residues: 

2633 # First of all, from each group of repeated residues, discard the first residue 

2634 # The first residue will remain as it is and the rest will be renumerated 

2635 # Join all residues to be renumerated in a single list 

2636 residue_to_renumerate = [] 

2637 for residues in overall_duplicated_residues: 

2638 residue_to_renumerate += residues[1:] 

2639 # Now group residues per chain since the renumeration is done by chains 

2640 chain_residues = {} 

2641 for residue in residue_to_renumerate: 

2642 chain = residue.chain 

2643 current_chain_residues = chain_residues.get(chain, None) 

2644 if current_chain_residues: current_chain_residues.append(residue) 

2645 else: chain_residues[chain] = [ residue ] 

2646 # Iterate residue chain groups 

2647 for chain, residues in chain_residues.items(): 

2648 # Find the last residue number in the current chain 

2649 maximum_chain_number = max([ residue.number for residue in chain.residues ]) 

2650 # Sort residues by index 

2651 residues.sort(key=lambda x: x.index, reverse=True) 

2652 for residue in residues: 

2653 # Set the number of the residue as the next available number 

2654 residue.number = maximum_chain_number + 1 

2655 # Update the maximum number 

2656 maximum_chain_number = residue.number 

2657 modified = True 

2658 return modified 

2659 

2660 # DANI: No recuerdo los problemas que daba tener átomos repetidos 

2661 def check_repeated_atoms (self, fix_atoms : bool = False, display_summary : bool = False) -> bool: 

2662 """Check atoms to search for repeated atoms. 

2663 Atoms with identical chain, residue and name are considered repeated atoms. 

2664 

2665 Args: 

2666 fix_atoms (bool): If True, rename repeated atoms. 

2667 display_summary (bool): If True, display a summary of repeated atoms. 

2668  

2669 Returns: 

2670 bool: True if there were any repeated atoms, False otherwise. 

2671 """ 

2672 # Set two trackers for display 

2673 repeated_atoms_count = 0 

2674 example = None 

2675 for residue in self.residues: 

2676 # Iterate over the residue atoms counting how many repeated names we have 

2677 current_names = [] 

2678 for atom in residue.atoms: 

2679 # We check if the atom name already exists. If not, go to the next atom 

2680 name = atom.name 

2681 if name not in current_names: 

2682 current_names.append(name) 

2683 continue 

2684 # When atom is repeated 

2685 repeated_atoms_count += 1 

2686 # If the fix was not requested we stop here 

2687 if not fix_atoms: 

2688 continue 

2689 # We set the name of the atom as the element letter + the count of this element 

2690 # If element is missing take the first character of the atom name 

2691 initial = atom.element 

2692 if not initial or initial == ' ': 

2693 initial = name[0] 

2694 number = 1 

2695 new_name = initial + str(number) 

2696 while new_name in current_names: 

2697 number += 1 

2698 if number > 999: 

2699 raise ValueError('There are more than 999 atoms with the same name in the residue') 

2700 new_name = initial + str(number) 

2701 # Save an example for the logs if there is none yet 

2702 if not example: 

2703 example = f'{atom.name} renamed as {new_name} in residue {residue}' 

2704 atom.name = new_name 

2705 current_names.append(new_name) 

2706 # Display the summary of repeated atoms if requested 

2707 if display_summary: 

2708 if repeated_atoms_count > 0: 

2709 warn(f'There are repeated atoms ({repeated_atoms_count})') 

2710 print(f' e.g. {example}') 

2711 return repeated_atoms_count > 0 

2712 

2713 def is_missing_any_bonds (self) -> bool: 

2714 return any(bond == MISSING_BONDS for bond in self.bonds) 

2715 

2716 def check_incoherent_bonds (self) -> bool: 

2717 """ Check bonds to be incoherent i.e. check atoms not to have more or less  

2718 bonds than expected according to their element.  

2719 Return True if any incoherent bond is found. """ 

2720 # Find out if there are hydrogens in the structure 

2721 # It may happen that we encounter an structure without hydrogens 

2722 has_hydrogen = next(( True for atom in self.atoms if atom.element == 'H' ), False) 

2723 coherent_bonds = coherent_bonds_with_hydrogen if has_hydrogen else coherent_bonds_without_hydrogen 

2724 # Check coherent bonds atom by atom 

2725 for atom in self.atoms: 

2726 # Do not check this atom bonds if this may be an ion 

2727 # Most authors "force" dummy bonds in these atoms to make them stable 

2728 if atom.element in SUPPORTED_ION_ELEMENTS: continue 

2729 # Ignore dummy atoms as well 

2730 if atom.element == DUMMY_ATOM_ELEMENT: continue 

2731 # Ignore coarse grain atoms as well 

2732 if atom.element == CG_ATOM_ELEMENT: continue 

2733 # Get actual number of bonds in the current atom both with and without ion bonds 

2734 # LORE: This was a problem since some ions are force-bonded but bonds are actually not real 

2735 # LORE: When an ion is forced we may end up with hyrogens with 2 bonds or carbons with 5 bonds 

2736 # LORE: When an ions is really bonded we can not discard it or we may end up with orphan carbons (e.g. ligands) 

2737 min_nbonds = len(atom.get_bonds(skip_ions = True, skip_dummies = True)) 

2738 max_nbonds = len(atom.get_bonds(skip_ions = False, skip_dummies = True)) 

2739 # Get the accepted range of number of bonds for the current atom according to its element 

2740 element = atom.element 

2741 element_coherent_bonds = coherent_bonds.get(element, None) 

2742 # If there are no specficiations for the current atom element then skip it 

2743 if not element_coherent_bonds: 

2744 continue 

2745 # Get the range of accepted number of bonds 

2746 min_allowed_bonds = element_coherent_bonds['min'] 

2747 max_allowed_bonds = element_coherent_bonds['max'] 

2748 # Check the actual number of bonds is insdie the accepted range 

2749 if max_nbonds < min_allowed_bonds or min_nbonds > max_allowed_bonds: 

2750 if min_nbonds == max_nbonds: 

2751 print(f' An atom with element {element} has {min_nbonds} bonds') 

2752 else: 

2753 print(f' An atom with element {element} has between {min_nbonds} bonds (withou ions) and {max_nbonds} bonds (with ions)') 

2754 if min_allowed_bonds == max_allowed_bonds: 

2755 plural_sufix = '' if min_allowed_bonds == 1 else 's' 

2756 print(f' It should have {min_allowed_bonds} bond{plural_sufix}') 

2757 else: 

2758 print(f' It should have between {min_allowed_bonds} and {max_allowed_bonds} bonds') 

2759 print(f' -> Atom {atom.label}') 

2760 bond_label = ', '.join([ self.atoms[atom].label for atom in atom.get_bonds(skip_ions = False) ]) 

2761 print(f' -> Bonds {bond_label}') 

2762 return True 

2763 return False 

2764 

2765 def get_covalent_bonds (self, selection : Optional['Selection'] = None) -> list[ list[int] ]: 

2766 """Get all atomic covalent (strong) bonds. 

2767 Bonds are defined as a list of atom indices for each atom in the structure. 

2768 Rely on VMD logic to do so.""" 

2769 # It is important to fix elements before trying to fix bonds, since elements have an impact on bonds 

2770 # VMD logic to find bonds relies in the atom element to set the covalent bond distance cutoff 

2771 self.fix_atom_elements() 

2772 # Generate a pdb strucutre to feed vmd 

2773 auxiliar_pdb_filepath = '.structure.pdb' 

2774 self.generate_pdb_file(auxiliar_pdb_filepath) 

2775 # Get covalent bonds between both residue atoms 

2776 covalent_bonds = get_covalent_bonds(auxiliar_pdb_filepath, selection) 

2777 # For every atom in CG, replace its bonds with a class which will raise and error when read 

2778 # Thus we make sure using these wrong bonds anywhere further will result in failure 

2779 for atom_index in self.select_cg().atom_indices: 

2780 covalent_bonds[atom_index] = MISSING_BONDS 

2781 # Remove the auxiliar pdb file 

2782 os.remove(auxiliar_pdb_filepath) 

2783 return covalent_bonds 

2784 

2785 def copy_bonds (self) -> list[list[int]]: 

2786 """Make a copy of the bonds list.""" 

2787 new_bonds = [] 

2788 for atom_bonds in self.bonds: 

2789 # Missing bonds coming from CG atoms are forwarded 

2790 if atom_bonds == MISSING_BONDS: 

2791 new_bonds.append(MISSING_BONDS) 

2792 continue 

2793 # Copy also the inner lists to avoid further mutation of the original structure 

2794 new_bonds.append([ atom_index for atom_index in atom_bonds ]) 

2795 return new_bonds 

2796 

2797 def copy (self) -> 'Structure': 

2798 """Make a copy of the current structure.""" 

2799 atom_copies = [ atom.copy() for atom in self.atoms ] 

2800 residue_copies = [ residue.copy() for residue in self.residues ] 

2801 chain_copies = [ chain.copy() for chain in self.chains ] 

2802 structure_copy = Structure(atom_copies, residue_copies, chain_copies) 

2803 structure_copy.bonds = self.copy_bonds() 

2804 return structure_copy 

2805 

2806 # DANI: No lo he testeado en profundidad 

2807 def merge (self, other : 'Structure') -> 'Structure': 

2808 """Merge current structure with another structure.""" 

2809 # Copy self atoms, residues and chains 

2810 self_atom_copies = [ atom.copy() for atom in self.atoms ] 

2811 self_residue_copies = [ residue.copy() for residue in self.residues ] 

2812 self_chain_copies = [ chain.copy() for chain in self.chains ] 

2813 # Copy other atoms, residues and chains 

2814 other_atom_copies = [ atom.copy() for atom in other.atoms ] 

2815 other_residue_copies = [ residue.copy() for residue in other.residues ] 

2816 other_chain_copies = [ chain.copy() for chain in other.chains ] 

2817 # Adapt indices in other atoms, residues and chains 

2818 atom_index_offset = len(self_atom_copies) 

2819 residue_index_offset = len(self_residue_copies) 

2820 chain_index_offset = len(self_chain_copies) 

2821 for atom in other_atom_copies: 

2822 atom._index += atom_index_offset 

2823 atom._residue_index += residue_index_offset 

2824 for residue in other_residue_copies: 

2825 residue._index += residue_index_offset 

2826 residue._atom_indices = [ i + atom_index_offset for i in residue._atom_indices ] 

2827 residue._chain_index += chain_index_offset 

2828 for chain in other_chain_copies: 

2829 chain._index += chain_index_offset 

2830 chain._residue_indices = [ i + residue_index_offset for i in chain._residue_indices ] 

2831 # Merge self with other atoms, residues and chains and build the new structure 

2832 merged_atoms = self_atom_copies + other_atom_copies 

2833 merged_residues = self_residue_copies + other_residue_copies 

2834 merged_chains = self_chain_copies + other_chain_copies 

2835 return Structure(merged_atoms, merged_residues, merged_chains) 

2836 

2837 def find_rings (self, max_ring_size : int, selection : Optional[Selection] = None) -> list[ list[Atom] ]: 

2838 """Find rings with a maximum specific size or less in the structure and yield them as they are found.""" 

2839 found_rings = [] 

2840 selected_atom_indices = selection.atom_indices if selection else None 

2841 # Find rings recursively 

2842 def find_rings_recursive (atom_path : list[Atom]) -> Generator[ tuple[Atom], None, None ]: 

2843 # Get the current atom to continue the followup: the last atom 

2844 current_atom = atom_path[-1] # Note that the list MUST always have at least 1 atom 

2845 # Get bonded atoms to continue the path 

2846 followup_atoms = current_atom.get_bonded_atoms() 

2847 # Hydrogens are discarded since they have only 1 bond and thus they can only lead to a dead end 

2848 followup_atoms = [ atom for atom in followup_atoms if atom.element != 'H' ] 

2849 # In case there is a selection, check followup atoms not to be in the selection 

2850 if selected_atom_indices: 

2851 followup_atoms = [ atom for atom in followup_atoms if atom.index in selected_atom_indices ] 

2852 # Remove the previous atom to avoid going back 

2853 previous_atom = atom_path[-2] if len(atom_path) > 1 else None 

2854 if previous_atom: 

2855 # HARDCODE: This is a problem which should be studied and fixed 

2856 # DANI: I temporatily bypass the problem to make it on time to a meeting 

2857 if previous_atom not in followup_atoms: return 

2858 followup_atoms.remove(previous_atom) 

2859 # Iterate over the following bonded atoms 

2860 for followup_atom in followup_atoms: 

2861 # Check if the followup atom is in the path 

2862 path_index = next(( index for index, atom in enumerate(atom_path) if atom == followup_atom ), None) 

2863 # If so, we have found a ring 

2864 if path_index != None: 

2865 ring = atom_path[path_index:] 

2866 ring.sort(key=lambda x: x.index) 

2867 yield tuple(ring) 

2868 continue 

2869 # If the ring size is reached, we can not continue 

2870 if len(atom_path) == max_ring_size: continue 

2871 # Otherwise, keep searching 

2872 new_path = atom_path + [followup_atom] 

2873 for ring in find_rings_recursive(atom_path=new_path): 

2874 yield ring 

2875 # Find a starting atom and run the recursive logic 

2876 candidate_atom_indices = selected_atom_indices if selected_atom_indices else range(self.atom_count) 

2877 for candidate_atom_index in candidate_atom_indices: 

2878 candidate_atom = self.atoms[candidate_atom_index] 

2879 # It must be heavy atom 

2880 if len(candidate_atom.bonds) < 2: continue 

2881 # It must not be a dead end already 

2882 bonded_atoms = candidate_atom.get_bonded_atoms() 

2883 bonded_atoms = [ atom for atom in bonded_atoms if len(atom.bonds) >= 2 ] 

2884 # In case there is a selection, check followup atoms not to be in the selection 

2885 if selected_atom_indices: 

2886 bonded_atoms = [ atom for atom in bonded_atoms if atom.index in selected_atom_indices ] 

2887 for ring in find_rings_recursive(atom_path=[candidate_atom]): 

2888 found_rings.append(ring) 

2889 # Get unique rings 

2890 unique_rings = set(found_rings) 

2891 return list(unique_rings) 

2892 

2893 def get_selection_outer_bonds (self, selection : Selection) -> list[int]: 

2894 """Given an atom selection, get all bonds between these atoms and any other atom in the structure. 

2895 Note that inner bonds between atoms in the selection are discarded.""" 

2896 # Get bonds from all atoms in the selection 

2897 bonds = set() 

2898 for atom_index in selection.atom_indices: 

2899 atom_bonds = set(self.bonds[atom_index]) 

2900 bonds = bonds.union(atom_bonds) 

2901 # Discard selection atoms from the bonds list to discard inner bonds 

2902 bonds -= set(selection.atom_indices) 

2903 return list(bonds) 

2904 

2905 # Set the type of PTM according to the classification of the bonded residue 

2906 ptm_options = { 

2907 'protein': ValueError('A PTM residue must never be protein'), 

2908 'dna': 'DNA linkage', # DANI: Esto es posible aunque muy raro y no se como se llama 

2909 'rna': 'RNA linkage', # DANI: Esto es posible aunque muy raro y no se como se llama 

2910 'carbohydrate': 'Glycosilation', 

2911 'fatty': 'Lipidation', 

2912 'steroid': 'Steroid linkage', # DANI: Esto es posible aunque muy raro y no se como se llama 

2913 'ion': Warning('Ion is covalently bonded to protein'), # DANI: esto no es "correcto" pero si habitual 

2914 'solvent': Warning('Solvent is covalently bonded to protein'), # DANI: probablemente sea un error 

2915 'acetyl': 'Acetylation', # Typical N-capping terminals 

2916 'amide': 'Amidation', # Typical C-capping terminals 

2917 'other': Warning('Unknow type of PTM'), # Something not supported 

2918 } 

2919 

2920 def find_ptms (self) -> Generator[dict, None, None]: 

2921 """Find Post Translational Modifications (PTM) in the structure.""" 

2922 # Find bonds between protein and non-protein atoms 

2923 # First get all protein atoms 

2924 protein_selection = self.select_protein() 

2925 if not protein_selection: 

2926 return 

2927 protein_atom_indices = set(protein_selection.atom_indices) # This is used later 

2928 protein_outer_bonds = set(self.get_selection_outer_bonds(protein_selection)) 

2929 non_protein_selection = self.invert_selection(protein_selection) 

2930 if not non_protein_selection: 

2931 return 

2932 non_protein_atom_indices = set(non_protein_selection.atom_indices) 

2933 non_protein_atom_indices_bonded_to_protein = protein_outer_bonds.intersection(non_protein_atom_indices) 

2934 # Get each residue bonded to the protein and based on its 'classification' set the name of the PTM 

2935 for atom_index in non_protein_atom_indices_bonded_to_protein: 

2936 atom = self.atoms[atom_index] 

2937 residue = atom.residue 

2938 residue_classification = residue.get_classification() 

2939 ptm_classification = self.ptm_options[residue_classification] 

2940 # If we found something impossible then raise the error 

2941 if type(ptm_classification) == ValueError: 

2942 print(f'Problematic residue: {residue}') 

2943 raise ptm_classification 

2944 # If we do not know what it is then do tag it as a PTM but print a warning 

2945 if type(ptm_classification) == Warning: 

2946 warn(f'{ptm_classification} -> {residue}') 

2947 continue 

2948 # At this point we have found a valid PTM 

2949 # Find the protein residue linked to this PTM 

2950 atom_bonds = self.bonds[atom_index] 

2951 protein_bond = next((bond for bond in atom_bonds if bond in protein_atom_indices), None) 

2952 if protein_bond == None: 

2953 raise ValueError('There must be at least one protein bond to the current atom') 

2954 protein_residue_index = self.atoms[protein_bond].residue_index 

2955 # Set the PTM 

2956 yield { 'name': ptm_classification, 'residue_index': protein_residue_index } 

2957 

2958 def has_cg (self) -> bool: 

2959 """Ask if the structure has at least one coarse grain atom/residue.""" 

2960 return any(atom.element == CG_ATOM_ELEMENT for atom in self.atoms) 

2961 

2962 

2963# ========================= 

2964# Related functions 

2965# ========================= 

2966 

2967def calculate_distance (atom_1 : Atom, atom_2 : Atom) -> float: 

2968 """Calculate the distance between two atoms.""" 

2969 squared_distances_sum = 0 

2970 for i in { 'x': 0, 'y': 1, 'z': 2 }.values(): 

2971 squared_distances_sum += (atom_1.coords[i] - atom_2.coords[i])**2 

2972 return math.sqrt(squared_distances_sum) 

2973 

2974def calculate_angle (atom_1 : Atom, atom_2 : Atom, atom_3 : Atom) -> float: 

2975 """Calculate the angle between 3 atoms.""" 

2976 # From: https://stackoverflow.com/questions/35176451/python-code-to-calculate-angle-between-three-point-using-their-3d-coordinates 

2977 # Get coordinates in numpy arrays, which allows to easily calculate the difference 

2978 a = np.array(atom_1.coords) 

2979 b = np.array(atom_2.coords) 

2980 c = np.array(atom_3.coords) 

2981 # Set the two vectors which make the angle 

2982 ba = a - b 

2983 bc = c - b 

2984 # Calculate the angle between these 2 vectors 

2985 cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) 

2986 angle = np.arccos(cosine_angle) 

2987 return np.degrees(angle) 

2988 

2989def calculate_torsion (atom_1 : Atom, atom_2 : Atom, atom_3 : Atom, atom_4 : Atom) -> float: 

2990 """Calculate the torsion between 4 atoms.""" 

2991 # From: https://stackoverflow.com/questions/20305272/dihedral-torsion-angle-from-four-points-in-cartesian-coordinates-in-python 

2992 p0 = np.array(atom_1.coords) 

2993 p1 = np.array(atom_2.coords) 

2994 p2 = np.array(atom_3.coords) 

2995 p3 = np.array(atom_4.coords) 

2996 # Get some vectors 

2997 b0 = -1.0*(p1 - p0) 

2998 b1 = p2 - p1 

2999 b2 = p3 - p2 

3000 # normalize b1 so that it does not influence magnitude of vector 

3001 # rejections that come next 

3002 b1 /= np.linalg.norm(b1) 

3003 # vector rejections 

3004 # v = projection of b0 onto plane perpendicular to b1 

3005 # = b0 minus component that aligns with b1 

3006 # w = projection of b2 onto plane perpendicular to b1 

3007 # = b2 minus component that aligns with b1 

3008 v = b0 - np.dot(b0, b1)*b1 

3009 w = b2 - np.dot(b2, b1)*b1 

3010 # angle between v and w in a plane is the torsion angle 

3011 # v and w may not be normalized but that's fine since tan is y/x 

3012 x = np.dot(v, w) 

3013 y = np.dot(np.cross(b1, v), w) 

3014 return float(np.degrees(np.arctan2(y, x))) 

3015 

3016# ========================= 

3017# Auxiliar functions  

3018# ========================= 

3019 

3020def get_next_letter (letter : str) -> str: 

3021 """Set a function to get the next letter from an input letter in alphabetic order.""" 

3022 if not letter: 

3023 return 'A' 

3024 if letter == 'z' or letter == 'Z': 

3025 raise InputError("Limit of chain letters has been reached") 

3026 next_letter = chr(ord(letter) + 1) 

3027 return next_letter 

3028 

3029def first_cap_only (name : str) -> str: 

3030 """Given a string with 1 or 2 characters, return a new string with  

3031 the first letter cap and the second letter not cap (if any)""" 

3032 if len(name) == 1: 

3033 return name.upper() 

3034 first_character = name[0].upper() 

3035 second_character = name[1].lower() 

3036 return first_character + second_character 

3037 

3038lower_numbers = { 

3039 '0': '₀', 

3040 '1': '₁', 

3041 '2': '₂', 

3042 '3': '₃', 

3043 '4': '₄', 

3044 '5': '₅', 

3045 '6': '₆', 

3046 '7': '₇', 

3047 '8': '₈', 

3048 '9': '₉', 

3049} 

3050def get_lower_numbers (numbers_text : str) -> str: 

3051 """Convert numbers to lower text characters (chr 8020-8029).""" 

3052 return ''.join([ lower_numbers[c] for c in numbers_text ]) 

3053 

3054def filter_model (pdb_content : str, model : int) -> str: 

3055 """Set a function to filter lines in PDB content for a specific model.""" 

3056 # Make sure the PDB content has multiple models 

3057 # If not, then return the whole PDB content ignoring the flag 

3058 generic_model_header_regex = r'\nMODEL\s*[0-9]*\s*\n' 

3059 if not re.search(generic_model_header_regex, pdb_content): 

3060 print(f'PDB content has no models at all so it will be loaded as is (ignored model "{model}").') 

3061 return pdb_content 

3062 # If a model was passed then it means we must filter the PDB content 

3063 filtered_pdb_content = '' 

3064 # Search PDB lines until we find our model's header 

3065 model_header_regex = rf'^MODEL\s*{model}' 

3066 pdb_lines = iter(pdb_content.split('\n')) 

3067 for line in pdb_lines: 

3068 if not re.match(model_header_regex, line): continue 

3069 # If we just found the header 

3070 filtered_pdb_content = line 

3071 break 

3072 # If we did not find the header then stop here 

3073 if not filtered_pdb_content: raise RuntimeError(f'Could not find model "{model}" header') 

3074 # Add every line to the filtered content until we find the tail 

3075 model_footer_regex = r'^ENDMDL' 

3076 for line in pdb_lines: 

3077 filtered_pdb_content += '\n' + line 

3078 if re.match(model_footer_regex, line): return filtered_pdb_content 

3079 # If we did not find the footer then stop here 

3080 raise RuntimeError(f'Could not find model "{model}" footer')