Add explicit_hydrogen parameter

biotite-dev · Jan 24, 2025 · ae9f239 · ae9f239
1 parent a778010
commit ae9f239
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 11 deletions.
diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
@@ -49,7 +49,11 @@
 
 @requires_version("rdkit", ">=2020")
 def to_mol(
-    atoms, kekulize=False, use_dative_bonds=False, include_annotations=("atom_name",)
+    atoms,
+    kekulize=False,
+    use_dative_bonds=False,
+    include_annotations=("atom_name",),
+    explicit_hydrogen=True,
 ):
     """
     Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
@@ -75,6 +79,11 @@ def to_mol(
         Names of annotation arrays in `atoms` that are added as atom-level property with
         the same name to the returned :class:`rdkit.Chem.rdchem.Mol`.
         These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`.
+    explicit_hydrogen : bool, optional
+        If set to true, the conversion process expects that all hydrogen atoms are
+        explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
+        If set to false, the conversion process treats all hydrogen atoms as implicit
+        and all hydrogen atoms in the :class:`AtomArray` are removed.
 
     Returns
     -------
@@ -110,24 +119,29 @@ def to_mol(
     HB3
     HXT
     """
+    if not explicit_hydrogen:
+        atoms = atoms[..., atoms.element != "H"]
     mol = EditableMol(Mol())
 
     has_charge_annot = "charge" in atoms.get_annotation_categories()
     for i in range(atoms.array_length()):
         rdkit_atom = Atom(atoms.element[i].capitalize())
         if has_charge_annot:
             rdkit_atom.SetFormalCharge(atoms.charge[i].item())
+        if explicit_hydrogen:
+            rdkit_atom.SetNoImplicit(True)
         for annot_name in include_annotations:
             rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item())
         mol.AddAtom(rdkit_atom)
 
     if atoms.bonds is None:
         raise BadStructureError("An AtomArray with associated BondList is required")
-    bonds = atoms.bonds.as_array()
     if kekulize:
-        bonds = bonds.copy()
+        bonds = atoms.bonds.copy()
         bonds.remove_aromaticity()
-    for atom_i, atom_j, bond_type in atoms.bonds.as_array():
+    else:
+        bonds = atoms.bonds
+    for atom_i, atom_j, bond_type in bonds.as_array():
         if not use_dative_bonds and bond_type == BondType.COORDINATION:
             bond_type = BondType.SINGLE
         mol.AddBond(
@@ -261,7 +275,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         except KekulizeException:
             warnings.warn(
                 "Kekulization failed, "
-                "using 'BondType.ANY' instead for aromatic bonds instead",
+                "using 'BondType.AROMATIC' instead for aromatic bonds instead",
                 LossyConversionWarning,
             )
         rdkit_bonds = list(mol.GetBonds())

diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py
@@ -20,11 +20,9 @@ def _load_smiles():
         return file.read().splitlines()
 
 
-@pytest.mark.filterwarnings(
-    "ignore:"
-    "The coordinates are missing for some atoms. "
-    "The fallback coordinates will be used instead"
-)
+@pytest.mark.filterwarnings("ignore:Missing coordinates.*")
+@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*")
+@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning")
 @pytest.mark.parametrize(
     "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist()
 )
@@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name):
     # Some compounds in the CCD have missing coordinates
     assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True)
 
-    # There should be now undefined bonds
+    # There should be no undefined bonds
     assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all()
     # Kekulization returns one of multiple resonance structures, so the returned one
     # might not be the same as the input