Skip to content

Commit

Permalink
Use canonical rdkit import
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Feb 2, 2025
1 parent 67af5dc commit 4a04f4b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 45 deletions.
20 changes: 7 additions & 13 deletions doc/tutorial/interface/rdkit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@ For a proper structural formula, we need to compute proper 2D coordinates first.

import biotite.interface.rdkit as rdkit_interface
import biotite.structure.info as struc
import rdkit.Chem.AllChem as Chem
from rdkit.Chem.Draw import MolToImage
from rdkit.Chem.rdDepictor import Compute2DCoords
from rdkit.Chem.rdmolops import RemoveHs

penicillin = struc.residue("PNN")
mol = rdkit_interface.to_mol(penicillin)
# We do not want to include explicit hydrogen atoms in the structural formula
mol = RemoveHs(mol)
Compute2DCoords(mol)
mol = Chem.RemoveHs(mol)
Chem.Compute2DCoords(mol)
image = MolToImage(mol, size=(600, 400))
display(image)

Expand All @@ -49,18 +48,13 @@ One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to

.. jupyter-execute::

from rdkit.Chem import MolFromSmiles
from rdkit.Chem.rdDistGeom import EmbedMolecule
from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
from rdkit.Chem.rdmolops import AddHs

ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O"

mol = MolFromSmiles(ERTAPENEM_SMILES)
mol = Chem.MolFromSmiles(ERTAPENEM_SMILES)
# RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones
mol = AddHs(mol)
mol = Chem.AddHs(mol)
# Create a 3D conformer
conformer_id = EmbedMolecule(mol)
UFFOptimizeMolecule(mol)
conformer_id = Chem.EmbedMolecule(mol)
Chem.UFFOptimizeMolecule(mol)
ertapenem = rdkit_interface.from_mol(mol, conformer_id)
print(ertapenem)
59 changes: 27 additions & 32 deletions src/biotite/interface/rdkit/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
import warnings
from collections import defaultdict
import numpy as np
from rdkit.Chem.rdchem import Atom, Conformer, EditableMol, KekulizeException, Mol
from rdkit.Chem.rdchem import BondType as RDKitBondType
from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol
import rdkit.Chem.AllChem as Chem
from biotite.interface.version import requires_version
from biotite.interface.warning import LossyConversionWarning
from biotite.structure.atoms import AtomArray, AtomArrayStack
Expand All @@ -24,26 +22,26 @@
BondType.TRIPLE: BondType.AROMATIC_TRIPLE,
}
_BIOTITE_TO_RDKIT_BOND_TYPE = {
BondType.ANY: RDKitBondType.UNSPECIFIED,
BondType.SINGLE: RDKitBondType.SINGLE,
BondType.DOUBLE: RDKitBondType.DOUBLE,
BondType.TRIPLE: RDKitBondType.TRIPLE,
BondType.QUADRUPLE: RDKitBondType.QUADRUPLE,
BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC,
BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC,
BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC,
BondType.AROMATIC: RDKitBondType.AROMATIC,
BondType.ANY: Chem.BondType.UNSPECIFIED,
BondType.SINGLE: Chem.BondType.SINGLE,
BondType.DOUBLE: Chem.BondType.DOUBLE,
BondType.TRIPLE: Chem.BondType.TRIPLE,
BondType.QUADRUPLE: Chem.BondType.QUADRUPLE,
BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC,
BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC,
BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC,
BondType.AROMATIC: Chem.BondType.AROMATIC,
# Dative bonds may lead to a KekulizeException and may potentially be deprecated
# in the future (https://github.com/rdkit/rdkit/discussions/6995)
BondType.COORDINATION: RDKitBondType.SINGLE,
BondType.COORDINATION: Chem.BondType.SINGLE,
}
_RDKIT_TO_BIOTITE_BOND_TYPE = {
RDKitBondType.UNSPECIFIED: BondType.ANY,
RDKitBondType.SINGLE: BondType.SINGLE,
RDKitBondType.DOUBLE: BondType.DOUBLE,
RDKitBondType.TRIPLE: BondType.TRIPLE,
RDKitBondType.QUADRUPLE: BondType.QUADRUPLE,
RDKitBondType.DATIVE: BondType.COORDINATION,
Chem.BondType.UNSPECIFIED: BondType.ANY,
Chem.BondType.SINGLE: BondType.SINGLE,
Chem.BondType.DOUBLE: BondType.DOUBLE,
Chem.BondType.TRIPLE: BondType.TRIPLE,
Chem.BondType.QUADRUPLE: BondType.QUADRUPLE,
Chem.BondType.DATIVE: BondType.COORDINATION,
}


Expand Down Expand Up @@ -128,11 +126,11 @@ def to_mol(
else:
atoms = atoms[..., ~hydrogen_mask]

mol = EditableMol(Mol())
mol = Chem.EditableMol(Chem.Mol())

has_charge_annot = "charge" in atoms.get_annotation_categories()
for i in range(atoms.array_length()):
rdkit_atom = Atom(atoms.element[i].capitalize())
rdkit_atom = Chem.Atom(atoms.element[i].capitalize())
if has_charge_annot:
rdkit_atom.SetFormalCharge(atoms.charge[i].item())
if explicit_hydrogen:
Expand Down Expand Up @@ -162,11 +160,8 @@ def to_mol(
# Handle AtomArray and AtomArrayStack consistently
coord = coord[None, :, :]
for model_coord in coord:
conformer = Conformer(mol.GetNumAtoms())
# RDKit silently expects the data to be in C-contiguous order
# Otherwise the coordinates would be completely misassigned
# (https://github.com/rdkit/rdkit/issues/8221)
conformer.SetPositions(np.ascontiguousarray(model_coord, dtype=np.float64))
conformer = Chem.Conformer(mol.GetNumAtoms())
conformer.SetPositions(model_coord.astype(np.float64))
conformer.Set3D(True)
mol.AddConformer(conformer)

Expand Down Expand Up @@ -239,8 +234,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
if add_hydrogen is None:
add_hydrogen = not _has_explicit_hydrogen(mol)
if add_hydrogen:
SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
mol = AddHs(mol)
Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_ADJUSTHS)
mol = Chem.AddHs(mol)

rdkit_atoms = mol.GetAtoms()
if rdkit_atoms is None:
Expand Down Expand Up @@ -274,15 +269,15 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):

rdkit_bonds = list(mol.GetBonds())
is_aromatic = np.array(
[bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds]
[bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds]
)
if np.any(is_aromatic):
# Determine the kekulized order of aromatic bonds
# Copy as 'Kekulize()' modifies the molecule in-place
mol = Mol(mol)
mol = Chem.Mol(mol)
try:
Kekulize(mol)
except KekulizeException:
Chem.Kekulize(mol)
except Chem.KekulizeException:
warnings.warn(
"Kekulization failed, "
"using 'BondType.AROMATIC' instead for aromatic bonds instead",
Expand Down

0 comments on commit 4a04f4b

Please sign in to comment.