Skip to content

Commit

Permalink
Add explicit_hydrogen parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Jan 24, 2025
1 parent a778010 commit ae9f239
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
24 changes: 19 additions & 5 deletions src/biotite/interface/rdkit/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@

@requires_version("rdkit", ">=2020")
def to_mol(
atoms, kekulize=False, use_dative_bonds=False, include_annotations=("atom_name",)
atoms,
kekulize=False,
use_dative_bonds=False,
include_annotations=("atom_name",),
explicit_hydrogen=True,
):
"""
Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
Expand All @@ -75,6 +79,11 @@ def to_mol(
Names of annotation arrays in `atoms` that are added as atom-level property with
the same name to the returned :class:`rdkit.Chem.rdchem.Mol`.
These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`.
explicit_hydrogen : bool, optional
If set to true, the conversion process expects that all hydrogen atoms are
explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
If set to false, the conversion process treats all hydrogen atoms as implicit
and all hydrogen atoms in the :class:`AtomArray` are removed.
Returns
-------
Expand Down Expand Up @@ -110,24 +119,29 @@ def to_mol(
HB3
HXT
"""
if not explicit_hydrogen:
atoms = atoms[..., atoms.element != "H"]
mol = EditableMol(Mol())

has_charge_annot = "charge" in atoms.get_annotation_categories()
for i in range(atoms.array_length()):
rdkit_atom = Atom(atoms.element[i].capitalize())
if has_charge_annot:
rdkit_atom.SetFormalCharge(atoms.charge[i].item())
if explicit_hydrogen:
rdkit_atom.SetNoImplicit(True)
for annot_name in include_annotations:
rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item())
mol.AddAtom(rdkit_atom)

if atoms.bonds is None:
raise BadStructureError("An AtomArray with associated BondList is required")
bonds = atoms.bonds.as_array()
if kekulize:
bonds = bonds.copy()
bonds = atoms.bonds.copy()
bonds.remove_aromaticity()
for atom_i, atom_j, bond_type in atoms.bonds.as_array():
else:
bonds = atoms.bonds
for atom_i, atom_j, bond_type in bonds.as_array():
if not use_dative_bonds and bond_type == BondType.COORDINATION:
bond_type = BondType.SINGLE
mol.AddBond(
Expand Down Expand Up @@ -261,7 +275,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
except KekulizeException:
warnings.warn(
"Kekulization failed, "
"using 'BondType.ANY' instead for aromatic bonds instead",
"using 'BondType.AROMATIC' instead for aromatic bonds instead",
LossyConversionWarning,
)
rdkit_bonds = list(mol.GetBonds())
Expand Down
10 changes: 4 additions & 6 deletions tests/interface/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ def _load_smiles():
return file.read().splitlines()


@pytest.mark.filterwarnings(
"ignore:"
"The coordinates are missing for some atoms. "
"The fallback coordinates will be used instead"
)
@pytest.mark.filterwarnings("ignore:Missing coordinates.*")
@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*")
@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning")
@pytest.mark.parametrize(
"res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist()
)
Expand All @@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name):
# Some compounds in the CCD have missing coordinates
assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True)

# There should be now undefined bonds
# There should be no undefined bonds
assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all()
# Kekulization returns one of multiple resonance structures, so the returned one
# might not be the same as the input
Expand Down

0 comments on commit ae9f239

Please sign in to comment.