diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index d3b80f4a2..8ef52bc86 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -23,6 +23,7 @@ env:
--durations=50
--ignore={project}//tests//sequence//align//test_statistics.py
--ignore={project}//tests//application
+ --ignore={project}//tests//interface
--ignore={project}//tests//database
--ignore={project}//tests//test_doctest.py
--ignore={project}//tests//test_modname.py
@@ -223,6 +224,7 @@ jobs:
tests//test_modname.py
tests//database
tests//application
+ tests//interface
test-muscle5:
diff --git a/doc/conf.py b/doc/conf.py
index eee7fc572..86d5d438b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -67,6 +67,7 @@
"sphinx.ext.doctest",
"sphinx.ext.mathjax",
"sphinx.ext.linkcode",
+ "sphinx.ext.intersphinx",
"sphinxcontrib.bibtex",
"sphinx_gallery.gen_gallery",
"sphinx_design",
@@ -111,6 +112,10 @@
notfound_urls_prefix = "/latest/"
+intersphinx_mapping = {"rdkit": ("https://www.rdkit.org/docs/", None)}
+intersphinx_timeout = 60
+
+
#### HTML ####
html_theme = "pydata_sphinx_theme"
diff --git a/doc/switcher.py b/doc/switcher.py
index 5bd6202fc..a7b7e7c36 100644
--- a/doc/switcher.py
+++ b/doc/switcher.py
@@ -5,42 +5,13 @@
__author__ = "Patrick Kunzmann"
__all__ = ["create_switcher_json"]
-import functools
import json
-import re
-from dataclasses import dataclass
import requests
+from packaging.version import Version
import biotite
RELEASE_REQUEST = "https://api.github.com/repos/biotite-dev/biotite/releases"
BIOTITE_URL = "https://www.biotite-python.org"
-SEMVER_TAG_REGEX = r"^v?(\d+)\.(\d+)\.(\d+)"
-
-
-@functools.total_ordering
-@dataclass(frozen=True)
-class Version:
- major: ...
- minor: ...
- patch: ...
-
- @staticmethod
- def from_tag(tag):
- match = re.match(SEMVER_TAG_REGEX, tag)
- if match is None:
- raise ValueError(f"Invalid tag: {tag}")
- major, minor, patch = map(int, match.groups())
- return Version(major, minor, patch)
-
- def __str__(self):
- return f"{self.major}.{self.minor}.{self.patch}"
-
- def __ge__(self, other):
- return (self.major, self.minor, self.patch) >= (
- other.major,
- other.minor,
- other.patch,
- )
def _get_previous_versions(min_tag, n_versions, current_version):
@@ -48,17 +19,17 @@ def _get_previous_versions(min_tag, n_versions, current_version):
# -> request one more version than necessary
response = requests.get(RELEASE_REQUEST, params={"per_page": n_versions + 1})
release_data = json.loads(response.text)
- versions = [Version.from_tag(release["tag_name"]) for release in release_data]
+ versions = [Version(release["tag_name"]) for release in release_data]
applicable_versions = [
version
for version in versions
- if version >= Version.from_tag(min_tag) and version < current_version
+ if version >= Version(min_tag) and version < current_version
]
return applicable_versions[:n_versions]
def _get_current_version():
- return Version(*biotite.__version_tuple__[:3])
+ return Version(biotite.__version__)
def create_switcher_json(file_path, min_tag, n_versions):
@@ -81,7 +52,7 @@ def create_switcher_json(file_path, min_tag, n_versions):
versions.append(current_version)
versions.sort()
for version in versions:
- if version.patch != 0:
+ if version.micro != 0:
# Documentation is not uploaded for patch versions
continue
version_config.append(
diff --git a/doc/tutorial/index.rst b/doc/tutorial/index.rst
index 8dc88925b..c65afd2b7 100644
--- a/doc/tutorial/index.rst
+++ b/doc/tutorial/index.rst
@@ -103,4 +103,5 @@ provided by the mentioned subpackages.
database/index
sequence/index
structure/index
- application/index
\ No newline at end of file
+ application/index
+ interface/index
\ No newline at end of file
diff --git a/doc/tutorial/interface/index.rst b/doc/tutorial/interface/index.rst
new file mode 100644
index 000000000..b011bb9e7
--- /dev/null
+++ b/doc/tutorial/interface/index.rst
@@ -0,0 +1,38 @@
+:sd_hide_title: true
+
+.. include:: /tutorial/preamble.rst
+
+##########################
+``interface`` subpackage
+##########################
+
+Connecting the ecosystem - The ``interface`` subpackage
+=======================================================
+
+.. currentmodule:: biotite.interface
+
+In the last section we learned that :mod:`biotite.application` encapsulates entire
+external application runs with subsequent calls of ``start()`` and ``join()``.
+In contrast :mod:`biotite.interface` provides flexible interfaces to other Python
+packages in the bioinformatics ecosystem.
+Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray`
+and :class:`.Sequence`, and the corresponding objects in the respective interfaced
+package.
+Each interface is located in a separate subpackage with the same name as the
+interfaced package.
+For example, the interface to ``rdkit`` is placed in the subpackage
+:mod:`biotite.interface.rdkit`.
+
+.. note::
+
+ Like in :mod:`biotite.application`, the interfaced Python packages are not
+ dependencies of the ``biotite`` package.
+ Hence, they need to be installed separately.
+
+The following chapters will give you an overview of the different interfaced packages.
+
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+
+ rdkit
\ No newline at end of file
diff --git a/doc/tutorial/interface/rdkit.rst b/doc/tutorial/interface/rdkit.rst
new file mode 100644
index 000000000..ff9954a52
--- /dev/null
+++ b/doc/tutorial/interface/rdkit.rst
@@ -0,0 +1,66 @@
+.. include:: /tutorial/preamble.rst
+
+Interface to RDKit
+==================
+
+.. currentmodule:: biotite.interface.rdkit
+
+`RDKit `_ is a popular cheminformatics package
+and thus can be used to supplement *Biotite* with a variety of functionalities focused
+on small molecules, such as conversion from/to textual representations
+(e.g. *SMILES* and *InChI*) and visualization as structural formulas.
+Basically, the :mod:`biotite.interface.rdkit` subpackage provides only two functions:
+:func:`to_mol()` to obtain a :class:`rdkit.Chem.rdchem.Mol` from an :class:`.AtomArray`
+and :func:`from_mol()` for the reverse direction.
+The rest happens within the realm of *RDKit*.
+This tutorial will only give a small glance on how the interface can be used.
+For comprehensive documentation refer to the
+`RDKit documentation `_.
+
+First example: Depiction as structural formula
+----------------------------------------------
+*RDKit* allows rendering structural formulas using
+`pillow `_.
+For a proper structural formula, we need to compute proper 2D coordinates first.
+
+.. jupyter-execute::
+
+ import biotite.interface.rdkit as rdkit_interface
+ import biotite.structure.info as struc
+ from rdkit.Chem.Draw import MolToImage
+ from rdkit.Chem.rdDepictor import Compute2DCoords
+ from rdkit.Chem.rdmolops import RemoveHs
+
+ penicillin = struc.residue("PNN")
+ mol = rdkit_interface.to_mol(penicillin)
+ # We do not want to include explicit hydrogen atoms in the structural formula
+ mol = RemoveHs(mol)
+ Compute2DCoords(mol)
+ image = MolToImage(mol, size=(600, 400))
+ display(image)
+
+Second example: Creating a molecule from SMILES
+-----------------------------------------------
+Although the *Chemical Component Dictionary* accessible from
+:mod:`biotite.structure.info` already provides all compounds found in the PDB,
+there are a myriad of compounds out there that are not part of it.
+One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to
+*RDKit* to obtain the topology of the molecule and then computing the coordinates.
+
+.. jupyter-execute::
+
+ from rdkit.Chem import MolFromSmiles
+ from rdkit.Chem.rdDistGeom import EmbedMolecule
+ from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
+ from rdkit.Chem.rdmolops import AddHs
+
+ ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O"
+
+ mol = MolFromSmiles(ERTAPENEM_SMILES)
+ # RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones
+ mol = AddHs(mol)
+ # Create a 3D conformer
+ conformer_id = EmbedMolecule(mol)
+ UFFOptimizeMolecule(mol)
+ ertapenem = rdkit_interface.from_mol(mol, conformer_id)
+ print(ertapenem)
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index f384f9b95..ffbcd85fa 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,6 +22,7 @@ dependencies:
- msgpack-python >=0.5.6
- networkx >=2.0
- numpy >=2.0
+ - packaging >=24.0
- requests >=2.12
# Testing
- pytest >=7.0
@@ -53,3 +54,6 @@ dependencies:
- pydot >=1.4
- scikit-learn >=0.18
- scipy >=1.8.0
+ - pip:
+ # Conda provides no recent version of RDKit (required for biotite.interface)
+ - rdkit
diff --git a/pyproject.toml b/pyproject.toml
index 2c6ae641a..1a26d05cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ dependencies = [
"requests >= 2.12",
"msgpack >= 0.5.6",
"networkx >= 2.0",
- "requests >= 2.12",
+ "packaging >= 24.0",
]
dynamic = ["version"]
diff --git a/src/biotite/interface/__init__.py b/src/biotite/interface/__init__.py
new file mode 100644
index 000000000..71545544c
--- /dev/null
+++ b/src/biotite/interface/__init__.py
@@ -0,0 +1,19 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+This subpackage provides interfaces to other Python packages in the bioinformatics
+ecosystem.
+Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray`
+and :class:`.Sequence`, and the corresponding objects in the respective interfaced
+package.
+In contrast to :mod:`biotite.application`, where an entire application run is handled
+under the hood, :mod:`biotite.interface` only covers the object conversion, allowing
+for more flexibility.
+"""
+
+__name__ = "biotite.interface"
+__author__ = "Patrick Kunzmann"
+
+from .warning import *
diff --git a/src/biotite/interface/rdkit/__init__.py b/src/biotite/interface/rdkit/__init__.py
new file mode 100644
index 000000000..33d7eb121
--- /dev/null
+++ b/src/biotite/interface/rdkit/__init__.py
@@ -0,0 +1,15 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+This subpackage provides an interface to the `RDKit `_
+cheminformatics package.
+It allows conversion between :class:`.AtomArray` and :class:`rdkit.Chem.rdchem.Mol`
+objects.
+"""
+
+__name__ = "biotite.interface.rdkit"
+__author__ = "Patrick Kunzmann"
+
+from .mol import *
diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
new file mode 100644
index 000000000..3f9ad3170
--- /dev/null
+++ b/src/biotite/interface/rdkit/mol.py
@@ -0,0 +1,297 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+__name__ = "biotite.interface.rdkit"
+__author__ = "Patrick Kunzmann"
+__all__ = ["to_mol", "from_mol"]
+
+import warnings
+from collections import defaultdict
+import numpy as np
+from rdkit.Chem.rdchem import Atom, Conformer, EditableMol, KekulizeException, Mol
+from rdkit.Chem.rdchem import BondType as RDKitBondType
+from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol
+from biotite.interface.version import requires_version
+from biotite.interface.warning import LossyConversionWarning
+from biotite.structure.atoms import AtomArray, AtomArrayStack
+from biotite.structure.bonds import BondList, BondType
+from biotite.structure.error import BadStructureError
+
+_KEKULIZED_TO_AROMATIC_BOND_TYPE = {
+ BondType.SINGLE: BondType.AROMATIC_SINGLE,
+ BondType.DOUBLE: BondType.AROMATIC_DOUBLE,
+ BondType.TRIPLE: BondType.AROMATIC_TRIPLE,
+}
+_BIOTITE_TO_RDKIT_BOND_TYPE = {
+ BondType.ANY: RDKitBondType.UNSPECIFIED,
+ BondType.SINGLE: RDKitBondType.SINGLE,
+ BondType.DOUBLE: RDKitBondType.DOUBLE,
+ BondType.TRIPLE: RDKitBondType.TRIPLE,
+ BondType.QUADRUPLE: RDKitBondType.QUADRUPLE,
+ BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC,
+ BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC,
+ BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC,
+ BondType.AROMATIC: RDKitBondType.AROMATIC,
+ # Dative bonds may lead to a KekulizeException and may potentially be deprecated
+ # in the future (https://github.com/rdkit/rdkit/discussions/6995)
+ BondType.COORDINATION: RDKitBondType.SINGLE,
+}
+_RDKIT_TO_BIOTITE_BOND_TYPE = {
+ RDKitBondType.UNSPECIFIED: BondType.ANY,
+ RDKitBondType.SINGLE: BondType.SINGLE,
+ RDKitBondType.DOUBLE: BondType.DOUBLE,
+ RDKitBondType.TRIPLE: BondType.TRIPLE,
+ RDKitBondType.QUADRUPLE: BondType.QUADRUPLE,
+ RDKitBondType.DATIVE: BondType.COORDINATION,
+}
+
+
+@requires_version("rdkit", ">=2020")
+def to_mol(
+ atoms, kekulize=False, use_dative_bonds=False, include_annotations=("atom_name",)
+):
+ """
+ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
+ :class:`rdkit.Chem.rdchem.Mol`.
+
+ Parameters
+ ----------
+ atoms : AtomArray or AtomArrayStack
+ The molecule to be converted.
+ kekulize : bool, optional
+ If set to true, aromatic bonds are represented by single, double and triple
+ bonds.
+ By default, aromatic bond types are converted to
+ :attr:`rdkit.rdchem.BondType.AROMATIC`.
+ use_dative_bonds : bool, optional
+ If set to true, :attr:`BondType.COORDINATION` bonds are translated to
+ :attr:`rdkit.rdchem.BondType.DATIVE` bonds instead of
+ :attr:`rdkit.rdchem.BondType.SINGLE` bonds.
+ This may have the undesired side effect that a
+ :class:`rdkit.Chem.rdchem.KekulizeException` is raised for some molecules, when
+ the returned :class:`rdkit.Chem.rdchem.Mol` is kekulized.
+ include_annotations : list of str, optional
+ Names of annotation arrays in `atoms` that are added as atom-level property with
+ the same name to the returned :class:`rdkit.Chem.rdchem.Mol`.
+ These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`.
+
+ Returns
+ -------
+ mol : rdkit.Chem.rdchem.Mol
+ The *RDKit* molecule.
+ If the input `atoms` is an :class:`AtomArrayStack`, all models are included
+ as conformers with conformer IDs starting from ``0``.
+
+ Examples
+ --------
+
+ >>> from rdkit.Chem import MolToSmiles
+ >>> alanine_atom_array = residue("ALA")
+ >>> mol = to_mol(alanine_atom_array)
+ >>> print(MolToSmiles(mol))
+ [H]OC(=O)C([H])(N([H])[H])C([H])([H])[H]
+
+ By default, ``'atom_name'`` is stored as property of each atom.
+
+ >>> for atom in mol.GetAtoms():
+ ... print(atom.GetProp("atom_name"))
+ N
+ CA
+ C
+ O
+ CB
+ OXT
+ H
+ H2
+ HA
+ HB1
+ HB2
+ HB3
+ HXT
+ """
+ mol = EditableMol(Mol())
+
+ has_charge_annot = "charge" in atoms.get_annotation_categories()
+ for i in range(atoms.array_length()):
+ rdkit_atom = Atom(atoms.element[i].capitalize())
+ if has_charge_annot:
+ rdkit_atom.SetFormalCharge(atoms.charge[i].item())
+ for annot_name in include_annotations:
+ rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item())
+ mol.AddAtom(rdkit_atom)
+
+ if atoms.bonds is None:
+ raise BadStructureError("An AtomArray with associated BondList is required")
+ bonds = atoms.bonds.as_array()
+ if kekulize:
+ bonds = bonds.copy()
+ bonds.remove_aromaticity()
+ for atom_i, atom_j, bond_type in atoms.bonds.as_array():
+ if not use_dative_bonds and bond_type == BondType.COORDINATION:
+ bond_type = BondType.SINGLE
+ mol.AddBond(
+ atom_i.item(), atom_j.item(), _BIOTITE_TO_RDKIT_BOND_TYPE[bond_type]
+ )
+
+ # Create a proper 'frozen' Mol object
+ mol = mol.GetMol()
+ coord = atoms.coord
+ if coord.ndim == 2:
+ # Handle AtomArray and AtomArrayStack consistently
+ coord = coord[None, :, :]
+ for model_coord in coord:
+ conformer = Conformer(mol.GetNumAtoms())
+ conformer.SetPositions(model_coord.astype(np.float64))
+ conformer.Set3D(True)
+ mol.AddConformer(conformer)
+
+ return mol
+
+
+@requires_version("rdkit", ">=2020")
+def from_mol(mol, conformer_id=None, add_hydrogen=None):
+ """
+ Convert a :class:`rdkit.Chem.rdchem.Mol` into an :class:`.AtomArray` or
+ :class:`.AtomArrayStack`.
+
+ Parameters
+ ----------
+ mol : rdkit.Chem.rdchem.Mol
+ The molecule to be converted.
+ conformer_id : int, optional
+ The conformer to be converted.
+ By default, an :class:`AtomArrayStack` with all conformers is returned.
+ add_hydrogen : bool, optional
+ If set to true, explicit hydrogen atoms are always added.
+ If set to false, explicit hydrogen atoms are never added.
+ By default, explicit hydrogen atoms are only added, if hydrogen atoms are not
+ already present.
+
+ Returns
+ -------
+ atoms : AtomArray or AtomArrayStack
+ The converted atoms.
+ An :class:`AtomArrayStack` is only returned, if the `conformer_id` parameter
+ is not set.
+
+ Notes
+ -----
+ All atom-level properties of `mol`
+ (obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as string-type
+ annotation array with the same name.
+ ``element`` and ``charge`` are not inferred from properties but from the
+ dedicated attributes in the :class:`rdkit.Chem.rdchem.Mol` object.
+
+ Examples
+ --------
+
+ >>> from rdkit.Chem import MolFromSmiles
+ >>> from rdkit.Chem.rdDistGeom import EmbedMolecule
+ >>> from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
+ >>> from rdkit.Chem.rdmolops import AddHs
+ >>> mol = MolFromSmiles("C[C@@H](C(=O)O)N")
+ >>> mol = AddHs(mol)
+ >>> # Create a 3D conformer
+ >>> conformer_id = EmbedMolecule(mol)
+ >>> UFFOptimizeMolecule(mol)
+ 0
+ >>> alanine_atom_array = from_mol(mol, conformer_id)
+ >>> print(alanine_atom_array)
+ 0 C -1.067 1.111 -0.079
+ 0 C -0.366 -0.241 -0.217
+ 0 C 1.128 -0.082 -0.117
+ 0 O 1.654 0.353 0.943
+ 0 O 1.932 -0.413 -1.203
+ 0 N -0.865 -1.173 0.796
+ 0 H -0.715 1.807 -0.871
+ 0 H -2.165 0.980 -0.191
+ 0 H -0.862 1.562 0.916
+ 0 H -0.613 -0.650 -1.221
+ 0 H 2.938 -0.311 -1.154
+ 0 H -0.590 -0.837 1.749
+ 0 H -0.408 -2.103 0.649
+ """
+ if add_hydrogen is None:
+ add_hydrogen = not _has_explicit_hydrogen(mol)
+ if add_hydrogen:
+ SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
+ mol = AddHs(mol)
+
+ rdkit_atoms = mol.GetAtoms()
+ if rdkit_atoms is None:
+ raise BadStructureError("Could not obtains atoms from Mol")
+
+ if conformer_id is None:
+ conformers = [conf for conf in mol.GetConformers() if conf.Is3D()]
+ atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
+ for i, conformer in enumerate(conformers):
+ atoms.coord[i] = np.array(conformer.GetPositions())
+ else:
+ conformer = mol.GetConformer(conformer_id)
+ atoms = AtomArray(len(rdkit_atoms))
+ atoms.coord = np.array(conformer.GetPositions())
+
+ extra_annotations = defaultdict(
+ # Use 'object' dtype first, as the maximum string length is unknown
+ lambda: np.full(atoms.array_length(), "", dtype=object)
+ )
+ atoms.add_annotation("charge", int)
+ for rdkit_atom in rdkit_atoms:
+ annot_names = rdkit_atom.GetPropNames()
+ for annot_name in annot_names:
+ extra_annotations[annot_name][rdkit_atom.GetIdx()] = rdkit_atom.GetProp(
+ annot_name
+ )
+ atoms.element[rdkit_atom.GetIdx()] = rdkit_atom.GetSymbol().upper()
+ atoms.charge[rdkit_atom.GetIdx()] = rdkit_atom.GetFormalCharge()
+ for annot_name, array in extra_annotations.items():
+ atoms.set_annotation(annot_name, array.astype(str))
+
+ rdkit_bonds = list(mol.GetBonds())
+ is_aromatic = np.array(
+ [bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds]
+ )
+ if np.any(is_aromatic):
+ # Determine the kekulized order of aromatic bonds
+ # Copy as 'Kekulize()' modifies the molecule in-place
+ mol = Mol(mol)
+ try:
+ Kekulize(mol)
+ except KekulizeException:
+ warnings.warn(
+ "Kekulization failed, "
+ "using 'BondType.ANY' instead for aromatic bonds instead",
+ LossyConversionWarning,
+ )
+ rdkit_bonds = list(mol.GetBonds())
+ bond_array = np.full((len(rdkit_bonds), 3), BondType.ANY, dtype=np.uint32)
+ for i, bond in enumerate(rdkit_bonds):
+ bond_type = _RDKIT_TO_BIOTITE_BOND_TYPE.get(bond.GetBondType())
+ if bond_type is None:
+ warnings.warn(
+ f"Bond type '{bond.GetBondType().name}' cannot be mapped to Biotite, "
+ "using 'BondType.ANY' instead",
+ LossyConversionWarning,
+ )
+ bond_type = BondType.ANY
+ if is_aromatic[i]:
+ try:
+ bond_type = _KEKULIZED_TO_AROMATIC_BOND_TYPE[bond_type]
+ except KeyError:
+ bond_type = BondType.AROMATIC
+ warnings.warn(
+ "Kekulization returned invalid bond type, "
+ "using generic 'BondType.AROMATIC' instead",
+ LossyConversionWarning,
+ )
+ bond_array[i, 0] = bond.GetBeginAtomIdx()
+ bond_array[i, 1] = bond.GetEndAtomIdx()
+ bond_array[i, 2] = bond_type
+ atoms.bonds = BondList(atoms.array_length(), bond_array)
+
+ return atoms
+
+
+def _has_explicit_hydrogen(mol):
+ return mol.GetNumAtoms() > mol.GetNumHeavyAtoms()
diff --git a/src/biotite/interface/version.py b/src/biotite/interface/version.py
new file mode 100644
index 000000000..92da54c78
--- /dev/null
+++ b/src/biotite/interface/version.py
@@ -0,0 +1,71 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+__name__ = "biotite.interface"
+__author__ = "Patrick Kunzmann"
+__all__ = ["VersionError", "requires_version"]
+
+
+import functools
+import importlib.metadata
+from packaging.specifiers import SpecifierSet
+from packaging.version import Version
+
+# Stores the variant of interface functions
+# compatible with the respective installed package version
+_functions_for_version = {}
+
+
+class VersionError(Exception):
+ """
+ This exception is raised when the installed version of an interfaced package is
+ incompatible with all implemented variants of a function.
+ """
+
+ pass
+
+
+def requires_version(package, version_specifier):
+ """
+ Declare a function variant that is compatible with a specific version range of the
+ interfaced package.
+
+ Parameters
+ ----------
+ package : str
+ The name of the interfaced package.
+ version_specifier : str or list of str
+ The :pep:`440` version specifier(s) for the interfaced package that are
+ compatible with the function.
+ Multiple constraints can be either given as a list of strings or as a single
+ comma-separated string.
+ """
+
+ def decorator(function):
+ @functools.wraps(function)
+ def wrapper(*args, **kwargs):
+ function_for_version = _functions_for_version.get(function.__name__)
+ if function_for_version is None:
+ raise VersionError(
+ f"No variant of '{function.__name__}()' "
+ f"found for installed '{package}'=={package_version}'"
+ )
+ return function_for_version(*args, **kwargs)
+
+ if isinstance(version_specifier, str):
+ specifier = SpecifierSet(version_specifier)
+ else:
+ specifier = SpecifierSet.intersection(*version_specifier)
+ try:
+ package_version = Version(importlib.metadata.version(package))
+ except importlib.metadata.PackageNotFoundError:
+ raise ImportError(
+ f"'{function.__name__}()' requires the '{package}' package"
+ )
+ if package_version in specifier:
+ _functions_for_version[function.__name__] = function
+
+ return wrapper
+
+ return decorator
diff --git a/src/biotite/interface/warning.py b/src/biotite/interface/warning.py
new file mode 100644
index 000000000..e60b0e49e
--- /dev/null
+++ b/src/biotite/interface/warning.py
@@ -0,0 +1,19 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+__name__ = "biotite.interface"
+__author__ = "Patrick Kunzmann"
+__all__ = ["LossyConversionWarning"]
+
+
+class LossyConversionWarning(UserWarning):
+ """
+ Warning raised, when some information is lost during conversion.
+
+ Note that most conversion functions will be inherently lossy to some extent.
+ This warning is only raised, when the loss of information happens only for
+ some edge case.
+ """
+
+ pass
diff --git a/tests/interface/__init__.py b/tests/interface/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/interface/data/README.rst b/tests/interface/data/README.rst
new file mode 100644
index 000000000..c1b8e8079
--- /dev/null
+++ b/tests/interface/data/README.rst
@@ -0,0 +1,5 @@
+Datasets for testing interfaces
+===============================
+
+- ``smiles.txt``: Randomly selected SMILES strings from the
+ ``pdbx_chem_comp_descriptor.descriptor`` of the CCD.
\ No newline at end of file
diff --git a/tests/interface/data/smiles.txt b/tests/interface/data/smiles.txt
new file mode 100644
index 000000000..19b76e583
--- /dev/null
+++ b/tests/interface/data/smiles.txt
@@ -0,0 +1,199 @@
+OC2C(NC(CNC(c1ccccc1)CO)C2O)C
+O=C(C(N)Cc1ccccc1)C
+OC(=O)c1cccc(c1)N2C(=O)c3ccccc3N=C2C=Cc4ccc(cc4)C#C
+Cc1ccc(OC[CH](O)CNC(C)(C)C)c2cc([nH]c12)C#N
+O=C1C=C(C=C(N1O)Cc2ccccc2)C
+C(=O)(NC(Cc1ccccc1)C(NC(CC(C)C)B(O)O)=O)C[C@H]=[C@H]c2cn(CCOCCOCCOCC(O)=O)nn2
+C[S](=O)(=O)C1CCN(CC1)c2cccc(CN3C=C(C=CC3=O)c4onc(n4)c5ccc(OC(F)(F)F)cc5)c2
+CC(C)(C)NO
+c2(Oc1c(c(Cl)nn1C)Cl)ccc(cc2)S(NC(=O)Nc3sc(cn3)Br)(=O)=O
+c2c1CN(CCc1cc(c2OC)OC)C=O
+CCC1(C(NC(=O)NC1=O)=O)c2ccccc2
+Cc1onc(c2ccccc2)c1C(=O)Nc3sc(cn3)[N+]([O-])=O
+Clc1cccc2c1C(=O)N=C1N2c2ccc(cc2C21CCCCC2)C1CCNCC1
+Oc1c2Cc3cc(Cc4ccc(cc4)[S]([O-])(=O)=O)cc(Cc5cc(Cc6ccc(cc6)[S]([O-])(=O)=O)cc(Cc7cc(Cc8ccc(cc8)[S]([O-])(=O)=O)cc(Cc9cc(Cc%10ccc(cc%10)[S]([O-])(=O)=O)cc(Cc%11cc(Cc%12ccc(cc%12)[S]([O-])(=O)=O)cc(Cc%13cc(Cc%14ccc(cc%14)[S]([O-])(=O)=O)cc(Cc%15cc(Cc%16ccc(cc%16)[S]([O-])(=O)=O)cc(Cc1cc(Cc%17ccc(cc%17)[S]([O-])(=O)=O)c2)c%15O)c%13O)c%11O)c9O)c7O)c5O)c3O
+C1(=O)C=CC2(C(C1(C)C)CCC5(C)C2CC4(C3=C(C(OC3)=O)C(C(=C4O5)C)=O)C)C
+Fc1cccc(F)c1S(=O)(=O)N2CCN(CC2)S(=O)(=O)c3ccc4OCCOc4c3
+CN(C)C(=O)OC[CH](N)C(O)=O
+Cc1ccc(S[CH]2O[CH](CO)[CH](O)[CH]([CH]2O)n3cc(nn3)c4cc(F)c(F)c(F)c4)cc1
+O=C1C=C5Oc6cc(O)ccc6N=C5C=C1CCCCC(=O)NS(=O)(=O)OCC4OC(n3cnc2c(ncnc23)N)C(O)C4O
+O=C(O)NC1(CCCCC1)C
+O=P(CCCCCCCCC)(C(C)C)C(C)C
+COc1cc2c(cc1)cncc2NC(=O)C1CCOc2ccc(Cl)cc21
+O=C(O)Cc2ccc(C(=O)c1ccc(cc1)C)n2C
+C2=C1CCCC(C)C1(CC(\C(=C)C)C2)C
+CN(C)c1ccc(cc1)[S](=O)(=O)N[N+]([O-])=O
+COC(=O)NCc1cc([nH]n1)c2sc(nc2N3CCC(O)CC3)c4ccccc4
+CCC[CH](NC(=O)[CH]1[CH]2CCC[CH]2CN1C=O)[CH](O)C(=O)NC3CC3
+c4(F)c(O)c(cc(Nc1nc2c(cn1)N(CC#C)C(=O)C(C)N2CC3CC3)c4)F
+c3(Nc1c(OC)cc(cc1)N2CCN(C)CC2)ncc(c(n3)NCc4cc(NC(/C=C)=O)ccc4)Cl
+O=C1c3c(OC(=C1)c2ccc(OC)cc2)cccc3
+c1(O)c(OC)cc(cc1)\C=C
+COC(=O)C[CH](N)C(O)=O
+CC=CC=CC(=O)N[CH](CC(=O)N[CH](C(C)C)C(=O)[CH]1[CH](C)C(=O)NC1=O)c2ccccc2
+CCc1n2c(nn1)cc(c3c2cc(cc3)OC)C
+O=C2c1n(cnc1N(C(=O)N2CCN4C(=O)N(c3ncn(c3C4=O)C)C)C)C
+OC[N+](CCCCCC)(C)C
+Nc1ncnc2cc(cc(F)c12)c3cccc(c3)[S](=O)(=O)NC(=O)[CH]4CCCN4
+CCC(=O)N1C[CH](CC[CH]1C)Nc2ncnc3[nH]ccc23
+O=C(NC(C(=O)O)C(C)C)C(NC(=O)CCCC(C(=O)O)N)CCS
+O=C(N1C(C(=O)O)CC(C)CC1)C(NS(=O)(=O)c3cc2ccccc2cc3)CC(=O)NCC4CCCN(C(=[N@H])N)C4
+O=C(NC(=O)c1ccccc1)C3N(C(=O)C(CC2CCCC2)CN(O)C=O)CCC3
+Cn1ccc2c1cccc2c3[nH]c4nccc(n5cc(N[CH]6CCOC[CH]6N)cn5)c4n3
+O=S(=O)(c1sc(c(c1)C)CCOC)NC(=O)Nc2nc(NC(=O)NC)cc(Br)c2
+Brc1ccc(cc1)C(CC(=O)O)NC(=O)c1cc2cc[NH]c2nc1
+O=C(C(n1nnc(c1)C(NC(=O)c2ccccc2)(C)C(C)C)C/C=C/C)C
+Fc1ccc(Cn2cnc3ccccc23)cc1
+C1(/C(CCC1)C(=O)O)=C/C(COP(O)(O)=O)N
+[O-][N+](=O)c2ccc(NCc1ncccc1)c(C(=O)N)c2
+[O-][n+]2c1ccccc1[n+]([O-])c(c2CBr)CBr
+NS(=O)(=O)c1ccc(cc1)C(=O)N1CC2CN(CC2(C)C1)C(=O)OCc1ccc(OC(F)(F)F)cc1
+[N@H]=Cc1nc(ccn1)CN
+O=C1N(CCC1)C(C(=O)NC(Cc2ccc(O)cc2)C(O)CNC4C(=O)NC(C(=O)NCCCOc3ccc(cc3)C4)C(C)CC)C(C)C
+CNC1(CO)CCOCC1
+CC(=O)NCC(=O)NCC#CBr
+c1cc(NCC)c(cc1S(C)(=O)=O)F
+Fc1cc2[NH]cc(c3ccc4nnn(CCN5CCNCC5)c4c3)c2cc1
+O=C(O)C(Cc1ccccc1)CC(O)C(N)Cc2ccccc2
+CC1CN(CC(C)O1)c1cc([N+]([O-])=O)c(NCc2ccc(CN3CC(O)C(O)C(O)C3CO)cc2)cc1
+O=P(O)(OCC1OCC(O)C1O)OP(=O)(O)OCC4OC(n3c(Br)nc2c(ncnc23)N)C(O)C4O
+Fc1cc(cc(F)c1CNC(=O)C1=NN(c2cnccc2)C(C)=CC1=O)c1ccccc1
+OC(=O)[CH]1CCCN1C(=O)C(CS)CS
+CCc1nc2ccc(Cl)cn2c1C(=O)NCc3ccc(cc3)N4CCC(CC4)c5ccc(OC(F)(F)F)cc5
+C[CH]1CO[CH](CN1C[CH]2C[CH]2CCNC(=O)c3[nH]c4ccccc4c3)c5ccc(N)nc5
+Brc2c1c(nnn1CCCO)c(Br)c(Br)c2Br
+CCCOc1ccc2[nH]ccc2c1
+N[CH](Cc1c[nH]c2ccccc12)C(N)=O
+OCc1cn(nn1)c2ccc(Cl)c(c2Cl)C(F)(F)F
+CC(C)=CCCC(C)=CCO[P](O)(O)=O
+Clc1nc(nc(n1)Nc5ccc(Nc4cc(c(N)c3C(=O)c2ccccc2C(=O)c34)S(=O)(=O)O)cc5S(=O)(=O)O)Nc6ccccc6S(=O)(=O)O
+O=C1c2ncn(c2N=C(N)N1)C3OC(C(O)C3O)COP(=O)(O)O
+c1cc2c(cc1)C(N(C)C2O)=O
+O=S(=O)(NC(=O)c1ccc(nc1N1CC(C)CC1(C)C)n1ccc(OCC(C)(C)C(F)(F)F)n1)c1cn(C)nc1C
+c2c(c1cscc1)csc2COc3cccc(c3)CN(C\C=C\C#CC(C)(C)C)CC
+COC(=O)C(O)=CC(=O)C1(CCN(CC1)Cc2cccc(OC)c2)Cc3ccc(Cl)cc3
+O=C(NCCCC(C(=O)O)NC(=O)C)CP(=O)(O)O
+N[CH](CCCC=C)C(O)=O
+Cc1ccccc1Oc2cc(N3C(=O)NC(=CC3=O)C(F)(F)F)c4ccccc4c2C#N
+FC1(OC(C(NC(=O)C)C(O)C1F)C(O)C(O)CO)C(=O)O
+O=S(=O)(N2CCCC(C(=O)\C=C(/O)C(=O)O)(Cc1ccc(Cl)cc1)C2)Cc3ccccc3
+O=C(O)C1C(C(=O)O)C1C(N)C(=O)O
+FC1=CNC(=O)NC1O
+O=C(O)C(=[N@H])CO
+FC(F)(F)c1cc2nc(c(nc2cc1)C(=O)O)NCc3ccc(OC)c(F)c3
+O=C1NC(=O)N(C=C1)C2OC(C(O)C2O)CN3CCCC3
+C[CH]1[CH]2CC[C]3(C)[CH](CC=C4[CH]5CC(C)(C)CC[C]5(CC[C]34C)C(O)=O)[C]2(C)CCC1=O
+N[CH](CCCCN=C(CF)C(O)=O)C(O)=O
+[Se-]C#N
+O=C(\C=C)CCC(C)C1CCC2C1(CCCC2=[C@H][C@H]=C3CC(C(\C(C3)O)=C)O)C
+O=C(CC(c1ccccc1)c2ccccc2)Nc3sc4ccccc4n3
+O=C(NO)C(O)C(C(=O)NC(C(=O)O)c1ccccc1)CC(C)C
+CN(Cc1ccc2OCOc2c1)C3=Nc4n(ncc4C(=O)N3)C(C)(C)C
+O=C1N=C(N=C1C3CCNC(=O)C2=NC(Br)=CC23)N
+Fc3cc4C(O)=C(C2=Nc1c(cccc1)S(=O)(=O)N2)C(=O)N(c4cc3)CCC5CC5
+[Ho+3]
+O=C(NC(C)(C)C)Nc1cc2nccnc2cc1
+O=C(O)C(O)C1CCCN(C1)c1ncnc2[NH]c3cc(F)ccc3c12
+CCN(CC)[S](=O)(=O)c1ccc(O)c(NC(=O)c2[nH]c(C)c(C(C)=O)c2CC)c1
+FC(F)(F)c1ccc2c(c1)NC(CN2)=O
+Nc1ncnc2n(cnc12)c3oc(CO[P](O)(=O)Oc4cc(oc4CO)n5cnc6c(N)ncnc56)c(O)c3
+CC[CH](N)c1ccccc1O
+O=C(O)C3N2C(=O)C1(NCCC1)C=CC2CC3
+O=C(O)C(NC)C(O)C
+CC(=O)N1CC2CC1CN2C(=O)CC1(CCOc2ccc(Cl)cc21)C(=O)Nc1cncc2ccccc21
+N#CC1(CC1)CS(=O)(=O)N1Cc2ccc(Cl)cc2C2(CCN(C2=O)c2cncc3ccccc32)C1
+[Ni]C#[O+]
+c1ccnc(n1)O
+CNc1nc(Cl)nc2n(Cc3cc(Cl)ccc3N[S](C)(=O)=O)cnc12
+O=C(C(=O)C(C)(C)CC)N3C(C(=O)OC(CCc1ccccc1)C2CCCCC2)CCCC3
+FC(F)(F)c1cc(ccc1)N2C=CC(=C(C2=O)c4cc3cnc(nc3cc4)N)C
+COc1ccc(cc1F)N
+O=C1N(C(=O)CC(=O)N1)CCc2ccccc2
+O=S(=O)(O)c3cc(/N=N/c2c(nn(c1c(cc(cc1)S(=O)(=O)O)C)c2O)C)c(C(=O)O)cc3
+CC(C)C[CH]1C[CH](C1)c2nnc([CH]3CN(C[CH]3C(=O)Nc4ccc(C)cc4C)C(C)=O)n2C5CC5
+N[CH](CCC(=O)N[CH](CS[C]1(Cc2ccccc2)NC(=O)[C](S)(CO)NC1=O)C(=O)NCC(O)=O)C(O)=O
+[Ni]O[Fe](C=O)(C=O)C#N
+COC(=O)C(NC(=O)C)CS
+c1cc(c(cc1)[N+](=O)[O-])C(=O)c2c(nc(s2)NC4C3CC(CC3)C4)N
+O=C(O)C(OC(=O)C(NC(=O)CCCC(C(=O)O)N)CS)CSC
+Fc1ccc(cc1C(=O)NCc2cc[nH]n2)[CH]3CCNC[CH]3COc4ccc5[nH]ncc5c4
+CN(C)Cc1nc2cccc3C(=O)NCCn1c23
+n1c(C)c(O)c(\C=N\CCCCC(N)C(=O)O)c(c1)COP(=O)(O)O
+FC(F)(F)C(C)C1=NN(c2ccccc2C)C(=O)c2cc(F)c(cc21)N1N=C(CO)N(CC)C1=O
+N(C=Cc1ccccc1)c2[nH]c3ccccc3n2
+O=C(OCc1ccccc1)C4NCC2(c3c(NC2=O)cccc3)C4
+C[N+](C)(C)C
+O=C(O)CCc2ccccc2OCCCOc1c(nc(nc1CC)N)N
+O=C1NC(=Nc2ccccc12)c3ccc(cc3)N4CCCCC4
+O=P(O)(O)OCC[N+](C)(C)C
+Clc1cnc(Nc2ccccc2)nc1Nc3cccc(NC(=O)C=C)c3
+O=C(O)C(N)COC(O)(O)C
+OC1COCC1(C)N1CCN(CC1)c1cc2cc(NC(=O)C3CC43CCOCC4)ncc2cc1Cl
+O=C(Nc1c(C)cc(C)cc1C)CO
+CC(=O)c1nc(NC(=O)C2(N)CCC2)sc1c3cncc(N)c3
+CN1N=CC(Nc2ccc(CN(C)C)cc2)=C(Cl)C1=O
+c1(ccc(cc1)C2=C(C3C(CC2O3)S(Oc4ccc(cc4)Br)(=O)=O)c5ccc(cc5)O)O
+O=C(NC(c1ccccc1)C)CCC3=Nc2c(cccc2)C(=O)N3
+CCN1CCC(C1)COc1ccc(cc1)C1c2ccc(O)cc2CC2(CC2)N1C(=O)c1ccccc1
+O=S(=O)(O)N2c1cc(c(cc1N(C3=NC(=O)NC(=O)C23)CC(O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC6OC(n5cnc4c(ncnc45)N)C(O)C6O)C)C
+O=C(CN1CCC(CC1)CN1Cc2ccccc2C1=O)c1ccc(F)cc1
+FC(F)(F)Oc1ccc(cc1)/C=C/c1cc(ccc1N1CCCC1)C(N)=O
+O=C(N)C(CCC)CCC
+C1/C(CCC2C1CCCC2)=C(/c3ccc(cc3)O)c4ccc(O)cc4
+O2C(SC1OC(C(O)C(O)C1O)CO)C(O)C(O)C(O)C2CO
+O=C1NC(=O)N(C=C1C)C2OC(C(O)C2OCCNC(N)N)COP(=O)(O)O
+COc1cccc(OC)c1C(O)=O
+COC(=O)[CH]1C[CH](O)[CH](O)[CH](CO)O1
+Fc1c(ccnc1C(F)(F)F)C(=O)N2CCC(CC2)N3CC(CC#N)(C3)n4cc(cn4)c5ncnc6[nH]ccc56
+Cc1cnn(c2ccc(OCc3ccccc3)c(c2)C(O)=O)c1c4ccc(Cl)c(F)c4
+Nc1ncnc2n(cnc12)[CH]3O[CH](COCC#Cc4nc5c(N)ncnc5n4[CH]6O[CH](CO)[CH](O)[CH]6O[P](O)(O)=O)[CH](O)[CH]3O
+C1CCCNC1C4(CN(C(c2c(c(c(cc2)F)F)Nc3ccc(I)cc3F)=O)C4)O
+O=C(O)CCc2onc(c1ccccc1)c2
+OC(=O)c1cc(F)c2nc(sc2c1)N3[CH]4CC[CH]3C[CH](C4)OCc5c(onc5c6ccccc6OC(F)(F)F)C7CC7
+O=C1NC(=O)N(C=C1C)C2OC(C(O)C2OCC(=O)NC)COP(=O)(O)O
+C[CH](N[S](=O)(=O)CCCOCN1C=CC(=O)NC1=O)c2cccc(OC3CCCC3)c2
+Cc1ncc(s1)CNc2cc(cnc2)F
+O=C1C=NC2=C(N1)C(=O)N=C(N)N2
+C1(C=CS(C1)(=O)=O)N(C(C)=O)c2ccc(F)cc2
+COc1cc(ccc1Nc2nc3[nH]cc(C#N)c3c(NC4CCCCC4)n2)N5CCCC5=O
+O=C(O)C1CCCCN1Cc1ccccc1
+O=C(OC)NCC(c1c(F)ccc(c1)CCCCCc2ccc(cc2)NC(=O)C(N3C(=O)C(N)CCC3)C4CCCCC4)Cc5cc6ccccc6nc5
+OC[CH]1O[CH](CNC(=O)c2ccc(F)c3ccccc23)[CH](O)[CH](OCC4=Cc5c(F)c(F)ccc5OC4=O)[CH]1O
+N[CH](CCCCNC(=O)OCc1ccccc1N=[N+]=[N-])C(O)=O
+c3(nc(NC1CCN(CC)CC1)c2cc(OC)c(cc2n3)OC)N(C)C4CCCCC4
+NC1CCc2cc(cnc2NC1=O)/C=C/C(=O)N(C)Cc1oc2ccccc2c1C
+O=C(c1ccc(N)cc1)NC(CC(C)C)CC(O)C(NC(=O)COc2c(cccc2C)C)Cc3ccccc3
+C1C(CCC(C(C)(O)C)C1)NC(=O)c4ccc2n(cc(n2)c3ccccc3)c4
+CN(C)C(=O)N[CH]1CC[CH](CC1)CCN2CCN(CC2)c3cccc(Cl)c3Cl
+Cc1ccccc1Oc2cc(N3C(=O)NC(=CC3=O)C(F)(F)c4ccccc4)c(F)cc2C#N
+O=C1NCC(O)CCC1NC(=O)C(OC)C(O)C(O)C(O)/C=C/C(C)(C)C
+COc1cc2CCN(C=O)[CH](CCc3c[nH]c4ccccc34)c2cc1OC
+O=S(=O)(N(Cc1occc1)CC(O)Cn4c2ccccc2c3ccccc34)C
+COc1cc(ccc1Nc4ncc3NC(=O)Nc2cc(C)ccc2c3n4)N5CC(C)NC(C)C5
+c1nc(O)ccc1
+FC1C(O)C(O)C(OC1O)C
+O=S(=O)(N)c1ccc(cc1)Nc2nc3ccc(cn3c2)C(=O)c4c(Cl)cccc4Cl
+NC(C(O)=O)Cc1cc(C(=O)O)nn1c2ccc(cc2)CCCF
+CCOc1c(scc1)C(N2CCN(CC2)c3cc(c(cc3)[N+]([O-])=O)N4CCCC4)=O
+Cc1cc(SC[CH](N)C(O)O)c(O)c(O)c1C
+O=C(OC1C(NCC1O)Cc2ccc(OC)cc2)C
+O(CCCCCN(C)C)C1CCCCCCCCCCC1
+O=C2c1c(ncnc1N)CC(C2)(C)C
+O=S1(=O)CCC(C1)N(CCC(N)=O)C(C)=O
+c2c1c(nc(N)s1)ccc2C(=O)N
+O=C(NC1CC1)c1cnn(C)c1C(=O)Nc1cc2nc(cn2cc1)c1ccccc1
+O=C(O)C(N)Cc1c(cc(O)cc1C)C
+C(c1ccc(cc1)CN(CC(Nc2ccc(C)cc2)=O)C(c3ccc(cc3)N(C)C)=O)(NO)=O
+Clc1c(OC)cc(OC)c(Cl)c1NC(=O)N(c2ncnc(c2)Nc4ccc(N3CCN(CC)CC3)cc4)C
+O=C1NC=C(NC1=O)C1COc2cc(ccc2S1)C#N
+CON=CC(C)(C)NC(=O)[CH](OC)Oc1ccc2ncc(cc2c1)C#C
+CC(=O)N(C1CC1)[CH](C(=O)NC2CCCC2)c3ccc(F)cc3
+C[CH](CS)C(=O)N1CCCCC[CH]1C(O)=O
+O=C1N=C(N)C=CN1C2OC(C(O)C2O)COP(=O)(O)O
+O=C(NC1(C(=O)NCC#N)CCCCC1)c2ccc(Br)cc2N
+O=C(O)c1cc(nn1CCN)c4ccnc(c2cc3ccccc3nc2)c4
+CNC(N\C(=N)NCCCNC(OC2C(C)C(OC(C(C)(O)C1C(C)C(OC(O1)(C)C)C(C)CC(C(C2C)OCC#C)(O)C)CC)=O)=O)=O
+Oc1c(CNC(CCCCN)C(=O)O)c(cnc1C)COP(=O)(O)O
+CN1[CH]2CC[CH]1C[CH](C2)OC(c3ccccc3)c4ccccc4
+O=C(O)CCc2onc(c1ccccc1)c2
+[S-]P(=O)(O)OP(=O)(O)O
\ No newline at end of file
diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py
new file mode 100644
index 000000000..24e56bf77
--- /dev/null
+++ b/tests/interface/test_rdkit.py
@@ -0,0 +1,160 @@
+from pathlib import Path
+import numpy as np
+import pytest
+from rdkit.Chem import MolFromSmiles, MolToSmiles
+from rdkit.Chem.rdchem import Atom, EditableMol, Mol
+from rdkit.Chem.rdchem import BondType as RDKitBondType
+from rdkit.Chem.rdmolops import (
+ AddHs,
+ RemoveStereochemistry,
+)
+import biotite.interface.rdkit as rdkit_interface
+import biotite.structure as struc
+import biotite.structure.info as info
+from biotite.interface import LossyConversionWarning
+from tests.util import data_dir
+
+
+def _load_smiles():
+ with open(Path(data_dir("interface")) / "smiles.txt") as file:
+ return file.read().splitlines()
+
+
+@pytest.mark.filterwarnings(
+ "ignore:"
+ "The coordinates are missing for some atoms. "
+ "The fallback coordinates will be used instead"
+)
+@pytest.mark.parametrize(
+ "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist()
+)
+def test_conversion_from_biotite(res_name):
+ """
+ Test a round trip conversion of a small molecule (single residue) from Biotite to
+ RDKit and back and expect to recover the same molecule.
+
+ Run this on randomly selected molecules from the CCD.
+ """
+ ref_atoms = info.residue(res_name, allow_missing_coord=True)
+
+ mol = rdkit_interface.to_mol(ref_atoms)
+ test_atoms = rdkit_interface.from_mol(mol, add_hydrogen=False)
+
+ assert test_atoms.atom_name.tolist() == ref_atoms.atom_name.tolist()
+ assert test_atoms.element.tolist() == ref_atoms.element.tolist()
+ assert test_atoms.charge.tolist() == ref_atoms.charge.tolist()
+ # Some compounds in the CCD have missing coordinates
+ assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True)
+
+ # There should be now undefined bonds
+ assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all()
+ # Kekulization returns one of multiple resonance structures, so the returned one
+ # might not be the same as the input
+ # -> Only check non aromatic bonds for equality
+ ref_is_aromatic = np.isin(
+ ref_atoms.bonds.as_array()[:, 2],
+ [
+ struc.BondType.AROMATIC_SINGLE,
+ struc.BondType.AROMATIC_DOUBLE,
+ struc.BondType.AROMATIC_TRIPLE,
+ struc.BondType.AROMATIC,
+ ],
+ )
+ test_is_aromatic = np.isin(
+ test_atoms.bonds.as_array()[:, 2],
+ [
+ struc.BondType.AROMATIC_SINGLE,
+ struc.BondType.AROMATIC_DOUBLE,
+ struc.BondType.AROMATIC_TRIPLE,
+ struc.BondType.AROMATIC,
+ ],
+ )
+ assert np.all(ref_is_aromatic == test_is_aromatic)
+ # Check also the non-aromatic bonds
+ assert set(
+ tuple(bond) for bond in test_atoms.bonds.as_array()[~test_is_aromatic]
+ ) == set(tuple(bond) for bond in ref_atoms.bonds.as_array()[~ref_is_aromatic])
+
+
+def test_conversion_from_biotite_multi_model():
+ """
+ Same as :func:`test_conversion_from_biotite()`, but with a multi-model structure.
+ """
+ RES_NAME = "ALA"
+ STACK_DEPTH = 1
+
+ ref_atoms = struc.stack([info.residue(RES_NAME)] * STACK_DEPTH)
+
+ mol = rdkit_interface.to_mol(ref_atoms)
+ test_atoms = rdkit_interface.from_mol(mol)
+
+ assert test_atoms.atom_name.tolist() == ref_atoms.atom_name.tolist()
+ assert test_atoms.element.tolist() == ref_atoms.element.tolist()
+ assert test_atoms.charge.tolist() == ref_atoms.charge.tolist()
+ assert np.allclose(test_atoms.coord.tolist(), ref_atoms.coord.tolist())
+ assert test_atoms.bonds.as_set() == ref_atoms.bonds.as_set()
+
+
+@pytest.mark.parametrize("smiles", _load_smiles())
+def test_conversion_from_rdkit(smiles):
+ """
+ Test a round trip conversion of a small molecule (single residue) from RDKit to
+ Biotite and back and expect to recover the same molecule.
+
+ Start from SMILES string to ensure that built-in functionality of RDKit is used
+ to create the initial molecule.
+ """
+ ref_mol = MolFromSmiles(smiles)
+ atoms = rdkit_interface.from_mol(ref_mol)
+ test_mol = rdkit_interface.to_mol(atoms)
+
+ # The intermediate AtomArray has explicit hydrogen atoms so add them explicitly
+ # to the reference as well for fair comparison
+ ref_mol = AddHs(ref_mol)
+ # The intermediate AtomArray does not have stereochemistry information,
+ # so this info cannot be preserved in the comparison
+ RemoveStereochemistry(ref_mol)
+
+ # RDKit does not support equality checking -> Use SMILES string as proxy
+ assert MolToSmiles(test_mol) == MolToSmiles(ref_mol)
+
+
+def test_kekulization():
+ """
+ Check if a benzene ring has alternating single and double bonds.
+ """
+ atoms = info.residue("BNZ")
+ atoms = atoms[atoms.element != "H"]
+ # Omit hydrogen for easier comparison of of aromatic bond types later on
+ ref_bond_types = atoms.bonds.as_array()[:, 2]
+
+ mol = rdkit_interface.to_mol(atoms)
+ atoms = rdkit_interface.from_mol(mol, add_hydrogen=False)
+ test_bond_types = atoms.bonds.as_array()[:, 2]
+
+ assert (
+ test_bond_types.tolist() == ref_bond_types.tolist()
+ # There are two possible resonance structures -> swap single and double bonds
+ or [
+ struc.BondType.AROMATIC_SINGLE
+ if btype == struc.BondType.AROMATIC_DOUBLE
+ else struc.BondType.AROMATIC_SINGLE
+ for btype in test_bond_types
+ ]
+ == ref_bond_types.tolist()
+ )
+
+
+def test_unmappable_bond_type():
+ """
+ Test that a warning is raised when a bond type cannot be mapped to Biotite.
+ """
+ mol = EditableMol(Mol())
+ mol.AddAtom(Atom("F"))
+ mol.AddAtom(Atom("F"))
+ # 'HEXTUPLE' has no corresponding Biotite bond type
+ mol.AddBond(0, 1, RDKitBondType.HEXTUPLE)
+ mol = mol.GetMol()
+
+ with pytest.warns(LossyConversionWarning):
+ rdkit_interface.from_mol(mol)
diff --git a/tests/interface/test_version.py b/tests/interface/test_version.py
new file mode 100644
index 000000000..226225816
--- /dev/null
+++ b/tests/interface/test_version.py
@@ -0,0 +1,26 @@
+import pytest
+from biotite.interface.version import VersionError, requires_version
+
+
+def test_requires_version_for_incompatible_version():
+ """
+ Expect an exception if the required package version for a function is not met.
+ """
+
+ @requires_version("biotite", ">999")
+ def function_with_incompatible_version():
+ pass
+
+ with pytest.raises(VersionError):
+ function_with_incompatible_version()
+
+
+def test_requires_version_for_missing_package():
+ """
+ Expect an exception if the required package for a function is not installed.
+ """
+ with pytest.raises(ImportError):
+
+ @requires_version("missing", ">=1.0")
+ def _function_with_missing_package():
+ pass
diff --git a/tests/test_doctest.py b/tests/test_doctest.py
index 6da470bf8..02ee064ba 100644
--- a/tests/test_doctest.py
+++ b/tests/test_doctest.py
@@ -170,6 +170,13 @@
is_not_installed("vina"), reason="Software is not installed"
),
),
+ pytest.param(
+ "biotite.interface.rdkit",
+ ["biotite.structure", "biotite.structure.info"],
+ marks=pytest.mark.skipif(
+ cannot_import("rdkit"), reason="Software is not installed"
+ ),
+ ),
]