diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 3f9ad3170..2b6701c73 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -49,7 +49,11 @@ @requires_version("rdkit", ">=2020") def to_mol( - atoms, kekulize=False, use_dative_bonds=False, include_annotations=("atom_name",) + atoms, + kekulize=False, + use_dative_bonds=False, + include_annotations=("atom_name",), + explicit_hydrogen=True, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -75,6 +79,11 @@ def to_mol( Names of annotation arrays in `atoms` that are added as atom-level property with the same name to the returned :class:`rdkit.Chem.rdchem.Mol`. These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`. + explicit_hydrogen : bool, optional + If set to true, the conversion process expects that all hydrogen atoms are + explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. + If set to false, the conversion process treats all hydrogen atoms as implicit + and all hydrogen atoms in the :class:`AtomArray` are removed. Returns ------- @@ -110,6 +119,8 @@ def to_mol( HB3 HXT """ + if not explicit_hydrogen: + atoms = atoms[..., atoms.element != "H"] mol = EditableMol(Mol()) has_charge_annot = "charge" in atoms.get_annotation_categories() @@ -117,17 +128,20 @@ def to_mol( rdkit_atom = Atom(atoms.element[i].capitalize()) if has_charge_annot: rdkit_atom.SetFormalCharge(atoms.charge[i].item()) + if explicit_hydrogen: + rdkit_atom.SetNoImplicit(True) for annot_name in include_annotations: rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item()) mol.AddAtom(rdkit_atom) if atoms.bonds is None: raise BadStructureError("An AtomArray with associated BondList is required") - bonds = atoms.bonds.as_array() if kekulize: - bonds = bonds.copy() + bonds = atoms.bonds.copy() bonds.remove_aromaticity() - for atom_i, atom_j, bond_type in atoms.bonds.as_array(): + else: + bonds = atoms.bonds + for atom_i, atom_j, bond_type in bonds.as_array(): if not use_dative_bonds and bond_type == BondType.COORDINATION: bond_type = BondType.SINGLE mol.AddBond( @@ -261,7 +275,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): except KekulizeException: warnings.warn( "Kekulization failed, " - "using 'BondType.ANY' instead for aromatic bonds instead", + "using 'BondType.AROMATIC' instead for aromatic bonds instead", LossyConversionWarning, ) rdkit_bonds = list(mol.GetBonds()) diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py index 24e56bf77..c35e9da54 100644 --- a/tests/interface/test_rdkit.py +++ b/tests/interface/test_rdkit.py @@ -20,11 +20,9 @@ def _load_smiles(): return file.read().splitlines() -@pytest.mark.filterwarnings( - "ignore:" - "The coordinates are missing for some atoms. " - "The fallback coordinates will be used instead" -) +@pytest.mark.filterwarnings("ignore:Missing coordinates.*") +@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*") +@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning") @pytest.mark.parametrize( "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist() ) @@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name): # Some compounds in the CCD have missing coordinates assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True) - # There should be now undefined bonds + # There should be no undefined bonds assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all() # Kekulization returns one of multiple resonance structures, so the returned one # might not be the same as the input