diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index d3b80f4a2..8ef52bc86 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -23,6 +23,7 @@ env: --durations=50 --ignore={project}//tests//sequence//align//test_statistics.py --ignore={project}//tests//application + --ignore={project}//tests//interface --ignore={project}//tests//database --ignore={project}//tests//test_doctest.py --ignore={project}//tests//test_modname.py @@ -223,6 +224,7 @@ jobs: tests//test_modname.py tests//database tests//application + tests//interface test-muscle5: diff --git a/doc/conf.py b/doc/conf.py index eee7fc572..86d5d438b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -67,6 +67,7 @@ "sphinx.ext.doctest", "sphinx.ext.mathjax", "sphinx.ext.linkcode", + "sphinx.ext.intersphinx", "sphinxcontrib.bibtex", "sphinx_gallery.gen_gallery", "sphinx_design", @@ -111,6 +112,10 @@ notfound_urls_prefix = "/latest/" +intersphinx_mapping = {"rdkit": ("https://www.rdkit.org/docs/", None)} +intersphinx_timeout = 60 + + #### HTML #### html_theme = "pydata_sphinx_theme" diff --git a/doc/switcher.py b/doc/switcher.py index 5bd6202fc..a7b7e7c36 100644 --- a/doc/switcher.py +++ b/doc/switcher.py @@ -5,42 +5,13 @@ __author__ = "Patrick Kunzmann" __all__ = ["create_switcher_json"] -import functools import json -import re -from dataclasses import dataclass import requests +from packaging.version import Version import biotite RELEASE_REQUEST = "https://api.github.com/repos/biotite-dev/biotite/releases" BIOTITE_URL = "https://www.biotite-python.org" -SEMVER_TAG_REGEX = r"^v?(\d+)\.(\d+)\.(\d+)" - - -@functools.total_ordering -@dataclass(frozen=True) -class Version: - major: ... - minor: ... - patch: ... - - @staticmethod - def from_tag(tag): - match = re.match(SEMVER_TAG_REGEX, tag) - if match is None: - raise ValueError(f"Invalid tag: {tag}") - major, minor, patch = map(int, match.groups()) - return Version(major, minor, patch) - - def __str__(self): - return f"{self.major}.{self.minor}.{self.patch}" - - def __ge__(self, other): - return (self.major, self.minor, self.patch) >= ( - other.major, - other.minor, - other.patch, - ) def _get_previous_versions(min_tag, n_versions, current_version): @@ -48,17 +19,17 @@ def _get_previous_versions(min_tag, n_versions, current_version): # -> request one more version than necessary response = requests.get(RELEASE_REQUEST, params={"per_page": n_versions + 1}) release_data = json.loads(response.text) - versions = [Version.from_tag(release["tag_name"]) for release in release_data] + versions = [Version(release["tag_name"]) for release in release_data] applicable_versions = [ version for version in versions - if version >= Version.from_tag(min_tag) and version < current_version + if version >= Version(min_tag) and version < current_version ] return applicable_versions[:n_versions] def _get_current_version(): - return Version(*biotite.__version_tuple__[:3]) + return Version(biotite.__version__) def create_switcher_json(file_path, min_tag, n_versions): @@ -81,7 +52,7 @@ def create_switcher_json(file_path, min_tag, n_versions): versions.append(current_version) versions.sort() for version in versions: - if version.patch != 0: + if version.micro != 0: # Documentation is not uploaded for patch versions continue version_config.append( diff --git a/doc/tutorial/index.rst b/doc/tutorial/index.rst index 8dc88925b..c65afd2b7 100644 --- a/doc/tutorial/index.rst +++ b/doc/tutorial/index.rst @@ -103,4 +103,5 @@ provided by the mentioned subpackages. database/index sequence/index structure/index - application/index \ No newline at end of file + application/index + interface/index \ No newline at end of file diff --git a/doc/tutorial/interface/index.rst b/doc/tutorial/interface/index.rst new file mode 100644 index 000000000..b011bb9e7 --- /dev/null +++ b/doc/tutorial/interface/index.rst @@ -0,0 +1,38 @@ +:sd_hide_title: true + +.. include:: /tutorial/preamble.rst + +########################## +``interface`` subpackage +########################## + +Connecting the ecosystem - The ``interface`` subpackage +======================================================= + +.. currentmodule:: biotite.interface + +In the last section we learned that :mod:`biotite.application` encapsulates entire +external application runs with subsequent calls of ``start()`` and ``join()``. +In contrast :mod:`biotite.interface` provides flexible interfaces to other Python +packages in the bioinformatics ecosystem. +Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray` +and :class:`.Sequence`, and the corresponding objects in the respective interfaced +package. +Each interface is located in a separate subpackage with the same name as the +interfaced package. +For example, the interface to ``rdkit`` is placed in the subpackage +:mod:`biotite.interface.rdkit`. + +.. note:: + + Like in :mod:`biotite.application`, the interfaced Python packages are not + dependencies of the ``biotite`` package. + Hence, they need to be installed separately. + +The following chapters will give you an overview of the different interfaced packages. + +.. toctree:: + :maxdepth: 1 + :hidden: + + rdkit \ No newline at end of file diff --git a/doc/tutorial/interface/rdkit.rst b/doc/tutorial/interface/rdkit.rst new file mode 100644 index 000000000..ff9954a52 --- /dev/null +++ b/doc/tutorial/interface/rdkit.rst @@ -0,0 +1,66 @@ +.. include:: /tutorial/preamble.rst + +Interface to RDKit +================== + +.. currentmodule:: biotite.interface.rdkit + +`RDKit `_ is a popular cheminformatics package +and thus can be used to supplement *Biotite* with a variety of functionalities focused +on small molecules, such as conversion from/to textual representations +(e.g. *SMILES* and *InChI*) and visualization as structural formulas. +Basically, the :mod:`biotite.interface.rdkit` subpackage provides only two functions: +:func:`to_mol()` to obtain a :class:`rdkit.Chem.rdchem.Mol` from an :class:`.AtomArray` +and :func:`from_mol()` for the reverse direction. +The rest happens within the realm of *RDKit*. +This tutorial will only give a small glance on how the interface can be used. +For comprehensive documentation refer to the +`RDKit documentation `_. + +First example: Depiction as structural formula +---------------------------------------------- +*RDKit* allows rendering structural formulas using +`pillow `_. +For a proper structural formula, we need to compute proper 2D coordinates first. + +.. jupyter-execute:: + + import biotite.interface.rdkit as rdkit_interface + import biotite.structure.info as struc + from rdkit.Chem.Draw import MolToImage + from rdkit.Chem.rdDepictor import Compute2DCoords + from rdkit.Chem.rdmolops import RemoveHs + + penicillin = struc.residue("PNN") + mol = rdkit_interface.to_mol(penicillin) + # We do not want to include explicit hydrogen atoms in the structural formula + mol = RemoveHs(mol) + Compute2DCoords(mol) + image = MolToImage(mol, size=(600, 400)) + display(image) + +Second example: Creating a molecule from SMILES +----------------------------------------------- +Although the *Chemical Component Dictionary* accessible from +:mod:`biotite.structure.info` already provides all compounds found in the PDB, +there are a myriad of compounds out there that are not part of it. +One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to +*RDKit* to obtain the topology of the molecule and then computing the coordinates. + +.. jupyter-execute:: + + from rdkit.Chem import MolFromSmiles + from rdkit.Chem.rdDistGeom import EmbedMolecule + from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule + from rdkit.Chem.rdmolops import AddHs + + ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O" + + mol = MolFromSmiles(ERTAPENEM_SMILES) + # RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones + mol = AddHs(mol) + # Create a 3D conformer + conformer_id = EmbedMolecule(mol) + UFFOptimizeMolecule(mol) + ertapenem = rdkit_interface.from_mol(mol, conformer_id) + print(ertapenem) \ No newline at end of file diff --git a/environment.yml b/environment.yml index f384f9b95..ffbcd85fa 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - msgpack-python >=0.5.6 - networkx >=2.0 - numpy >=2.0 + - packaging >=24.0 - requests >=2.12 # Testing - pytest >=7.0 @@ -53,3 +54,6 @@ dependencies: - pydot >=1.4 - scikit-learn >=0.18 - scipy >=1.8.0 + - pip: + # Conda provides no recent version of RDKit (required for biotite.interface) + - rdkit diff --git a/pyproject.toml b/pyproject.toml index 2c6ae641a..1a26d05cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "requests >= 2.12", "msgpack >= 0.5.6", "networkx >= 2.0", - "requests >= 2.12", + "packaging >= 24.0", ] dynamic = ["version"] diff --git a/src/biotite/interface/__init__.py b/src/biotite/interface/__init__.py new file mode 100644 index 000000000..71545544c --- /dev/null +++ b/src/biotite/interface/__init__.py @@ -0,0 +1,19 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +""" +This subpackage provides interfaces to other Python packages in the bioinformatics +ecosystem. +Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray` +and :class:`.Sequence`, and the corresponding objects in the respective interfaced +package. +In contrast to :mod:`biotite.application`, where an entire application run is handled +under the hood, :mod:`biotite.interface` only covers the object conversion, allowing +for more flexibility. +""" + +__name__ = "biotite.interface" +__author__ = "Patrick Kunzmann" + +from .warning import * diff --git a/src/biotite/interface/rdkit/__init__.py b/src/biotite/interface/rdkit/__init__.py new file mode 100644 index 000000000..33d7eb121 --- /dev/null +++ b/src/biotite/interface/rdkit/__init__.py @@ -0,0 +1,15 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +""" +This subpackage provides an interface to the `RDKit `_ +cheminformatics package. +It allows conversion between :class:`.AtomArray` and :class:`rdkit.Chem.rdchem.Mol` +objects. +""" + +__name__ = "biotite.interface.rdkit" +__author__ = "Patrick Kunzmann" + +from .mol import * diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py new file mode 100644 index 000000000..3f9ad3170 --- /dev/null +++ b/src/biotite/interface/rdkit/mol.py @@ -0,0 +1,297 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +__name__ = "biotite.interface.rdkit" +__author__ = "Patrick Kunzmann" +__all__ = ["to_mol", "from_mol"] + +import warnings +from collections import defaultdict +import numpy as np +from rdkit.Chem.rdchem import Atom, Conformer, EditableMol, KekulizeException, Mol +from rdkit.Chem.rdchem import BondType as RDKitBondType +from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol +from biotite.interface.version import requires_version +from biotite.interface.warning import LossyConversionWarning +from biotite.structure.atoms import AtomArray, AtomArrayStack +from biotite.structure.bonds import BondList, BondType +from biotite.structure.error import BadStructureError + +_KEKULIZED_TO_AROMATIC_BOND_TYPE = { + BondType.SINGLE: BondType.AROMATIC_SINGLE, + BondType.DOUBLE: BondType.AROMATIC_DOUBLE, + BondType.TRIPLE: BondType.AROMATIC_TRIPLE, +} +_BIOTITE_TO_RDKIT_BOND_TYPE = { + BondType.ANY: RDKitBondType.UNSPECIFIED, + BondType.SINGLE: RDKitBondType.SINGLE, + BondType.DOUBLE: RDKitBondType.DOUBLE, + BondType.TRIPLE: RDKitBondType.TRIPLE, + BondType.QUADRUPLE: RDKitBondType.QUADRUPLE, + BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC, + BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC, + BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC, + BondType.AROMATIC: RDKitBondType.AROMATIC, + # Dative bonds may lead to a KekulizeException and may potentially be deprecated + # in the future (https://github.com/rdkit/rdkit/discussions/6995) + BondType.COORDINATION: RDKitBondType.SINGLE, +} +_RDKIT_TO_BIOTITE_BOND_TYPE = { + RDKitBondType.UNSPECIFIED: BondType.ANY, + RDKitBondType.SINGLE: BondType.SINGLE, + RDKitBondType.DOUBLE: BondType.DOUBLE, + RDKitBondType.TRIPLE: BondType.TRIPLE, + RDKitBondType.QUADRUPLE: BondType.QUADRUPLE, + RDKitBondType.DATIVE: BondType.COORDINATION, +} + + +@requires_version("rdkit", ">=2020") +def to_mol( + atoms, kekulize=False, use_dative_bonds=False, include_annotations=("atom_name",) +): + """ + Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a + :class:`rdkit.Chem.rdchem.Mol`. + + Parameters + ---------- + atoms : AtomArray or AtomArrayStack + The molecule to be converted. + kekulize : bool, optional + If set to true, aromatic bonds are represented by single, double and triple + bonds. + By default, aromatic bond types are converted to + :attr:`rdkit.rdchem.BondType.AROMATIC`. + use_dative_bonds : bool, optional + If set to true, :attr:`BondType.COORDINATION` bonds are translated to + :attr:`rdkit.rdchem.BondType.DATIVE` bonds instead of + :attr:`rdkit.rdchem.BondType.SINGLE` bonds. + This may have the undesired side effect that a + :class:`rdkit.Chem.rdchem.KekulizeException` is raised for some molecules, when + the returned :class:`rdkit.Chem.rdchem.Mol` is kekulized. + include_annotations : list of str, optional + Names of annotation arrays in `atoms` that are added as atom-level property with + the same name to the returned :class:`rdkit.Chem.rdchem.Mol`. + These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`. + + Returns + ------- + mol : rdkit.Chem.rdchem.Mol + The *RDKit* molecule. + If the input `atoms` is an :class:`AtomArrayStack`, all models are included + as conformers with conformer IDs starting from ``0``. + + Examples + -------- + + >>> from rdkit.Chem import MolToSmiles + >>> alanine_atom_array = residue("ALA") + >>> mol = to_mol(alanine_atom_array) + >>> print(MolToSmiles(mol)) + [H]OC(=O)C([H])(N([H])[H])C([H])([H])[H] + + By default, ``'atom_name'`` is stored as property of each atom. + + >>> for atom in mol.GetAtoms(): + ... print(atom.GetProp("atom_name")) + N + CA + C + O + CB + OXT + H + H2 + HA + HB1 + HB2 + HB3 + HXT + """ + mol = EditableMol(Mol()) + + has_charge_annot = "charge" in atoms.get_annotation_categories() + for i in range(atoms.array_length()): + rdkit_atom = Atom(atoms.element[i].capitalize()) + if has_charge_annot: + rdkit_atom.SetFormalCharge(atoms.charge[i].item()) + for annot_name in include_annotations: + rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item()) + mol.AddAtom(rdkit_atom) + + if atoms.bonds is None: + raise BadStructureError("An AtomArray with associated BondList is required") + bonds = atoms.bonds.as_array() + if kekulize: + bonds = bonds.copy() + bonds.remove_aromaticity() + for atom_i, atom_j, bond_type in atoms.bonds.as_array(): + if not use_dative_bonds and bond_type == BondType.COORDINATION: + bond_type = BondType.SINGLE + mol.AddBond( + atom_i.item(), atom_j.item(), _BIOTITE_TO_RDKIT_BOND_TYPE[bond_type] + ) + + # Create a proper 'frozen' Mol object + mol = mol.GetMol() + coord = atoms.coord + if coord.ndim == 2: + # Handle AtomArray and AtomArrayStack consistently + coord = coord[None, :, :] + for model_coord in coord: + conformer = Conformer(mol.GetNumAtoms()) + conformer.SetPositions(model_coord.astype(np.float64)) + conformer.Set3D(True) + mol.AddConformer(conformer) + + return mol + + +@requires_version("rdkit", ">=2020") +def from_mol(mol, conformer_id=None, add_hydrogen=None): + """ + Convert a :class:`rdkit.Chem.rdchem.Mol` into an :class:`.AtomArray` or + :class:`.AtomArrayStack`. + + Parameters + ---------- + mol : rdkit.Chem.rdchem.Mol + The molecule to be converted. + conformer_id : int, optional + The conformer to be converted. + By default, an :class:`AtomArrayStack` with all conformers is returned. + add_hydrogen : bool, optional + If set to true, explicit hydrogen atoms are always added. + If set to false, explicit hydrogen atoms are never added. + By default, explicit hydrogen atoms are only added, if hydrogen atoms are not + already present. + + Returns + ------- + atoms : AtomArray or AtomArrayStack + The converted atoms. + An :class:`AtomArrayStack` is only returned, if the `conformer_id` parameter + is not set. + + Notes + ----- + All atom-level properties of `mol` + (obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as string-type + annotation array with the same name. + ``element`` and ``charge`` are not inferred from properties but from the + dedicated attributes in the :class:`rdkit.Chem.rdchem.Mol` object. + + Examples + -------- + + >>> from rdkit.Chem import MolFromSmiles + >>> from rdkit.Chem.rdDistGeom import EmbedMolecule + >>> from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule + >>> from rdkit.Chem.rdmolops import AddHs + >>> mol = MolFromSmiles("C[C@@H](C(=O)O)N") + >>> mol = AddHs(mol) + >>> # Create a 3D conformer + >>> conformer_id = EmbedMolecule(mol) + >>> UFFOptimizeMolecule(mol) + 0 + >>> alanine_atom_array = from_mol(mol, conformer_id) + >>> print(alanine_atom_array) + 0 C -1.067 1.111 -0.079 + 0 C -0.366 -0.241 -0.217 + 0 C 1.128 -0.082 -0.117 + 0 O 1.654 0.353 0.943 + 0 O 1.932 -0.413 -1.203 + 0 N -0.865 -1.173 0.796 + 0 H -0.715 1.807 -0.871 + 0 H -2.165 0.980 -0.191 + 0 H -0.862 1.562 0.916 + 0 H -0.613 -0.650 -1.221 + 0 H 2.938 -0.311 -1.154 + 0 H -0.590 -0.837 1.749 + 0 H -0.408 -2.103 0.649 + """ + if add_hydrogen is None: + add_hydrogen = not _has_explicit_hydrogen(mol) + if add_hydrogen: + SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) + mol = AddHs(mol) + + rdkit_atoms = mol.GetAtoms() + if rdkit_atoms is None: + raise BadStructureError("Could not obtains atoms from Mol") + + if conformer_id is None: + conformers = [conf for conf in mol.GetConformers() if conf.Is3D()] + atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) + for i, conformer in enumerate(conformers): + atoms.coord[i] = np.array(conformer.GetPositions()) + else: + conformer = mol.GetConformer(conformer_id) + atoms = AtomArray(len(rdkit_atoms)) + atoms.coord = np.array(conformer.GetPositions()) + + extra_annotations = defaultdict( + # Use 'object' dtype first, as the maximum string length is unknown + lambda: np.full(atoms.array_length(), "", dtype=object) + ) + atoms.add_annotation("charge", int) + for rdkit_atom in rdkit_atoms: + annot_names = rdkit_atom.GetPropNames() + for annot_name in annot_names: + extra_annotations[annot_name][rdkit_atom.GetIdx()] = rdkit_atom.GetProp( + annot_name + ) + atoms.element[rdkit_atom.GetIdx()] = rdkit_atom.GetSymbol().upper() + atoms.charge[rdkit_atom.GetIdx()] = rdkit_atom.GetFormalCharge() + for annot_name, array in extra_annotations.items(): + atoms.set_annotation(annot_name, array.astype(str)) + + rdkit_bonds = list(mol.GetBonds()) + is_aromatic = np.array( + [bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds] + ) + if np.any(is_aromatic): + # Determine the kekulized order of aromatic bonds + # Copy as 'Kekulize()' modifies the molecule in-place + mol = Mol(mol) + try: + Kekulize(mol) + except KekulizeException: + warnings.warn( + "Kekulization failed, " + "using 'BondType.ANY' instead for aromatic bonds instead", + LossyConversionWarning, + ) + rdkit_bonds = list(mol.GetBonds()) + bond_array = np.full((len(rdkit_bonds), 3), BondType.ANY, dtype=np.uint32) + for i, bond in enumerate(rdkit_bonds): + bond_type = _RDKIT_TO_BIOTITE_BOND_TYPE.get(bond.GetBondType()) + if bond_type is None: + warnings.warn( + f"Bond type '{bond.GetBondType().name}' cannot be mapped to Biotite, " + "using 'BondType.ANY' instead", + LossyConversionWarning, + ) + bond_type = BondType.ANY + if is_aromatic[i]: + try: + bond_type = _KEKULIZED_TO_AROMATIC_BOND_TYPE[bond_type] + except KeyError: + bond_type = BondType.AROMATIC + warnings.warn( + "Kekulization returned invalid bond type, " + "using generic 'BondType.AROMATIC' instead", + LossyConversionWarning, + ) + bond_array[i, 0] = bond.GetBeginAtomIdx() + bond_array[i, 1] = bond.GetEndAtomIdx() + bond_array[i, 2] = bond_type + atoms.bonds = BondList(atoms.array_length(), bond_array) + + return atoms + + +def _has_explicit_hydrogen(mol): + return mol.GetNumAtoms() > mol.GetNumHeavyAtoms() diff --git a/src/biotite/interface/version.py b/src/biotite/interface/version.py new file mode 100644 index 000000000..92da54c78 --- /dev/null +++ b/src/biotite/interface/version.py @@ -0,0 +1,71 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +__name__ = "biotite.interface" +__author__ = "Patrick Kunzmann" +__all__ = ["VersionError", "requires_version"] + + +import functools +import importlib.metadata +from packaging.specifiers import SpecifierSet +from packaging.version import Version + +# Stores the variant of interface functions +# compatible with the respective installed package version +_functions_for_version = {} + + +class VersionError(Exception): + """ + This exception is raised when the installed version of an interfaced package is + incompatible with all implemented variants of a function. + """ + + pass + + +def requires_version(package, version_specifier): + """ + Declare a function variant that is compatible with a specific version range of the + interfaced package. + + Parameters + ---------- + package : str + The name of the interfaced package. + version_specifier : str or list of str + The :pep:`440` version specifier(s) for the interfaced package that are + compatible with the function. + Multiple constraints can be either given as a list of strings or as a single + comma-separated string. + """ + + def decorator(function): + @functools.wraps(function) + def wrapper(*args, **kwargs): + function_for_version = _functions_for_version.get(function.__name__) + if function_for_version is None: + raise VersionError( + f"No variant of '{function.__name__}()' " + f"found for installed '{package}'=={package_version}'" + ) + return function_for_version(*args, **kwargs) + + if isinstance(version_specifier, str): + specifier = SpecifierSet(version_specifier) + else: + specifier = SpecifierSet.intersection(*version_specifier) + try: + package_version = Version(importlib.metadata.version(package)) + except importlib.metadata.PackageNotFoundError: + raise ImportError( + f"'{function.__name__}()' requires the '{package}' package" + ) + if package_version in specifier: + _functions_for_version[function.__name__] = function + + return wrapper + + return decorator diff --git a/src/biotite/interface/warning.py b/src/biotite/interface/warning.py new file mode 100644 index 000000000..e60b0e49e --- /dev/null +++ b/src/biotite/interface/warning.py @@ -0,0 +1,19 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +__name__ = "biotite.interface" +__author__ = "Patrick Kunzmann" +__all__ = ["LossyConversionWarning"] + + +class LossyConversionWarning(UserWarning): + """ + Warning raised, when some information is lost during conversion. + + Note that most conversion functions will be inherently lossy to some extent. + This warning is only raised, when the loss of information happens only for + some edge case. + """ + + pass diff --git a/tests/interface/__init__.py b/tests/interface/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/interface/data/README.rst b/tests/interface/data/README.rst new file mode 100644 index 000000000..c1b8e8079 --- /dev/null +++ b/tests/interface/data/README.rst @@ -0,0 +1,5 @@ +Datasets for testing interfaces +=============================== + +- ``smiles.txt``: Randomly selected SMILES strings from the + ``pdbx_chem_comp_descriptor.descriptor`` of the CCD. \ No newline at end of file diff --git a/tests/interface/data/smiles.txt b/tests/interface/data/smiles.txt new file mode 100644 index 000000000..19b76e583 --- /dev/null +++ b/tests/interface/data/smiles.txt @@ -0,0 +1,199 @@ +OC2C(NC(CNC(c1ccccc1)CO)C2O)C +O=C(C(N)Cc1ccccc1)C +OC(=O)c1cccc(c1)N2C(=O)c3ccccc3N=C2C=Cc4ccc(cc4)C#C +Cc1ccc(OC[CH](O)CNC(C)(C)C)c2cc([nH]c12)C#N +O=C1C=C(C=C(N1O)Cc2ccccc2)C +C(=O)(NC(Cc1ccccc1)C(NC(CC(C)C)B(O)O)=O)C[C@H]=[C@H]c2cn(CCOCCOCCOCC(O)=O)nn2 +C[S](=O)(=O)C1CCN(CC1)c2cccc(CN3C=C(C=CC3=O)c4onc(n4)c5ccc(OC(F)(F)F)cc5)c2 +CC(C)(C)NO +c2(Oc1c(c(Cl)nn1C)Cl)ccc(cc2)S(NC(=O)Nc3sc(cn3)Br)(=O)=O +c2c1CN(CCc1cc(c2OC)OC)C=O +CCC1(C(NC(=O)NC1=O)=O)c2ccccc2 +Cc1onc(c2ccccc2)c1C(=O)Nc3sc(cn3)[N+]([O-])=O +Clc1cccc2c1C(=O)N=C1N2c2ccc(cc2C21CCCCC2)C1CCNCC1 +Oc1c2Cc3cc(Cc4ccc(cc4)[S]([O-])(=O)=O)cc(Cc5cc(Cc6ccc(cc6)[S]([O-])(=O)=O)cc(Cc7cc(Cc8ccc(cc8)[S]([O-])(=O)=O)cc(Cc9cc(Cc%10ccc(cc%10)[S]([O-])(=O)=O)cc(Cc%11cc(Cc%12ccc(cc%12)[S]([O-])(=O)=O)cc(Cc%13cc(Cc%14ccc(cc%14)[S]([O-])(=O)=O)cc(Cc%15cc(Cc%16ccc(cc%16)[S]([O-])(=O)=O)cc(Cc1cc(Cc%17ccc(cc%17)[S]([O-])(=O)=O)c2)c%15O)c%13O)c%11O)c9O)c7O)c5O)c3O +C1(=O)C=CC2(C(C1(C)C)CCC5(C)C2CC4(C3=C(C(OC3)=O)C(C(=C4O5)C)=O)C)C +Fc1cccc(F)c1S(=O)(=O)N2CCN(CC2)S(=O)(=O)c3ccc4OCCOc4c3 +CN(C)C(=O)OC[CH](N)C(O)=O +Cc1ccc(S[CH]2O[CH](CO)[CH](O)[CH]([CH]2O)n3cc(nn3)c4cc(F)c(F)c(F)c4)cc1 +O=C1C=C5Oc6cc(O)ccc6N=C5C=C1CCCCC(=O)NS(=O)(=O)OCC4OC(n3cnc2c(ncnc23)N)C(O)C4O +O=C(O)NC1(CCCCC1)C +O=P(CCCCCCCCC)(C(C)C)C(C)C +COc1cc2c(cc1)cncc2NC(=O)C1CCOc2ccc(Cl)cc21 +O=C(O)Cc2ccc(C(=O)c1ccc(cc1)C)n2C +C2=C1CCCC(C)C1(CC(\C(=C)C)C2)C +CN(C)c1ccc(cc1)[S](=O)(=O)N[N+]([O-])=O +COC(=O)NCc1cc([nH]n1)c2sc(nc2N3CCC(O)CC3)c4ccccc4 +CCC[CH](NC(=O)[CH]1[CH]2CCC[CH]2CN1C=O)[CH](O)C(=O)NC3CC3 +c4(F)c(O)c(cc(Nc1nc2c(cn1)N(CC#C)C(=O)C(C)N2CC3CC3)c4)F +c3(Nc1c(OC)cc(cc1)N2CCN(C)CC2)ncc(c(n3)NCc4cc(NC(/C=C)=O)ccc4)Cl +O=C1c3c(OC(=C1)c2ccc(OC)cc2)cccc3 +c1(O)c(OC)cc(cc1)\C=C +COC(=O)C[CH](N)C(O)=O +CC=CC=CC(=O)N[CH](CC(=O)N[CH](C(C)C)C(=O)[CH]1[CH](C)C(=O)NC1=O)c2ccccc2 +CCc1n2c(nn1)cc(c3c2cc(cc3)OC)C +O=C2c1n(cnc1N(C(=O)N2CCN4C(=O)N(c3ncn(c3C4=O)C)C)C)C +OC[N+](CCCCCC)(C)C +Nc1ncnc2cc(cc(F)c12)c3cccc(c3)[S](=O)(=O)NC(=O)[CH]4CCCN4 +CCC(=O)N1C[CH](CC[CH]1C)Nc2ncnc3[nH]ccc23 +O=C(NC(C(=O)O)C(C)C)C(NC(=O)CCCC(C(=O)O)N)CCS +O=C(N1C(C(=O)O)CC(C)CC1)C(NS(=O)(=O)c3cc2ccccc2cc3)CC(=O)NCC4CCCN(C(=[N@H])N)C4 +O=C(NC(=O)c1ccccc1)C3N(C(=O)C(CC2CCCC2)CN(O)C=O)CCC3 +Cn1ccc2c1cccc2c3[nH]c4nccc(n5cc(N[CH]6CCOC[CH]6N)cn5)c4n3 +O=S(=O)(c1sc(c(c1)C)CCOC)NC(=O)Nc2nc(NC(=O)NC)cc(Br)c2 +Brc1ccc(cc1)C(CC(=O)O)NC(=O)c1cc2cc[NH]c2nc1 +O=C(C(n1nnc(c1)C(NC(=O)c2ccccc2)(C)C(C)C)C/C=C/C)C +Fc1ccc(Cn2cnc3ccccc23)cc1 +C1(/C(CCC1)C(=O)O)=C/C(COP(O)(O)=O)N +[O-][N+](=O)c2ccc(NCc1ncccc1)c(C(=O)N)c2 +[O-][n+]2c1ccccc1[n+]([O-])c(c2CBr)CBr +NS(=O)(=O)c1ccc(cc1)C(=O)N1CC2CN(CC2(C)C1)C(=O)OCc1ccc(OC(F)(F)F)cc1 +[N@H]=Cc1nc(ccn1)CN +O=C1N(CCC1)C(C(=O)NC(Cc2ccc(O)cc2)C(O)CNC4C(=O)NC(C(=O)NCCCOc3ccc(cc3)C4)C(C)CC)C(C)C +CNC1(CO)CCOCC1 +CC(=O)NCC(=O)NCC#CBr +c1cc(NCC)c(cc1S(C)(=O)=O)F +Fc1cc2[NH]cc(c3ccc4nnn(CCN5CCNCC5)c4c3)c2cc1 +O=C(O)C(Cc1ccccc1)CC(O)C(N)Cc2ccccc2 +CC1CN(CC(C)O1)c1cc([N+]([O-])=O)c(NCc2ccc(CN3CC(O)C(O)C(O)C3CO)cc2)cc1 +O=P(O)(OCC1OCC(O)C1O)OP(=O)(O)OCC4OC(n3c(Br)nc2c(ncnc23)N)C(O)C4O +Fc1cc(cc(F)c1CNC(=O)C1=NN(c2cnccc2)C(C)=CC1=O)c1ccccc1 +OC(=O)[CH]1CCCN1C(=O)C(CS)CS +CCc1nc2ccc(Cl)cn2c1C(=O)NCc3ccc(cc3)N4CCC(CC4)c5ccc(OC(F)(F)F)cc5 +C[CH]1CO[CH](CN1C[CH]2C[CH]2CCNC(=O)c3[nH]c4ccccc4c3)c5ccc(N)nc5 +Brc2c1c(nnn1CCCO)c(Br)c(Br)c2Br +CCCOc1ccc2[nH]ccc2c1 +N[CH](Cc1c[nH]c2ccccc12)C(N)=O +OCc1cn(nn1)c2ccc(Cl)c(c2Cl)C(F)(F)F +CC(C)=CCCC(C)=CCO[P](O)(O)=O +Clc1nc(nc(n1)Nc5ccc(Nc4cc(c(N)c3C(=O)c2ccccc2C(=O)c34)S(=O)(=O)O)cc5S(=O)(=O)O)Nc6ccccc6S(=O)(=O)O +O=C1c2ncn(c2N=C(N)N1)C3OC(C(O)C3O)COP(=O)(O)O +c1cc2c(cc1)C(N(C)C2O)=O +O=S(=O)(NC(=O)c1ccc(nc1N1CC(C)CC1(C)C)n1ccc(OCC(C)(C)C(F)(F)F)n1)c1cn(C)nc1C +c2c(c1cscc1)csc2COc3cccc(c3)CN(C\C=C\C#CC(C)(C)C)CC +COC(=O)C(O)=CC(=O)C1(CCN(CC1)Cc2cccc(OC)c2)Cc3ccc(Cl)cc3 +O=C(NCCCC(C(=O)O)NC(=O)C)CP(=O)(O)O +N[CH](CCCC=C)C(O)=O +Cc1ccccc1Oc2cc(N3C(=O)NC(=CC3=O)C(F)(F)F)c4ccccc4c2C#N +FC1(OC(C(NC(=O)C)C(O)C1F)C(O)C(O)CO)C(=O)O +O=S(=O)(N2CCCC(C(=O)\C=C(/O)C(=O)O)(Cc1ccc(Cl)cc1)C2)Cc3ccccc3 +O=C(O)C1C(C(=O)O)C1C(N)C(=O)O +FC1=CNC(=O)NC1O +O=C(O)C(=[N@H])CO +FC(F)(F)c1cc2nc(c(nc2cc1)C(=O)O)NCc3ccc(OC)c(F)c3 +O=C1NC(=O)N(C=C1)C2OC(C(O)C2O)CN3CCCC3 +C[CH]1[CH]2CC[C]3(C)[CH](CC=C4[CH]5CC(C)(C)CC[C]5(CC[C]34C)C(O)=O)[C]2(C)CCC1=O +N[CH](CCCCN=C(CF)C(O)=O)C(O)=O +[Se-]C#N +O=C(\C=C)CCC(C)C1CCC2C1(CCCC2=[C@H][C@H]=C3CC(C(\C(C3)O)=C)O)C +O=C(CC(c1ccccc1)c2ccccc2)Nc3sc4ccccc4n3 +O=C(NO)C(O)C(C(=O)NC(C(=O)O)c1ccccc1)CC(C)C +CN(Cc1ccc2OCOc2c1)C3=Nc4n(ncc4C(=O)N3)C(C)(C)C +O=C1N=C(N=C1C3CCNC(=O)C2=NC(Br)=CC23)N +Fc3cc4C(O)=C(C2=Nc1c(cccc1)S(=O)(=O)N2)C(=O)N(c4cc3)CCC5CC5 +[Ho+3] +O=C(NC(C)(C)C)Nc1cc2nccnc2cc1 +O=C(O)C(O)C1CCCN(C1)c1ncnc2[NH]c3cc(F)ccc3c12 +CCN(CC)[S](=O)(=O)c1ccc(O)c(NC(=O)c2[nH]c(C)c(C(C)=O)c2CC)c1 +FC(F)(F)c1ccc2c(c1)NC(CN2)=O +Nc1ncnc2n(cnc12)c3oc(CO[P](O)(=O)Oc4cc(oc4CO)n5cnc6c(N)ncnc56)c(O)c3 +CC[CH](N)c1ccccc1O +O=C(O)C3N2C(=O)C1(NCCC1)C=CC2CC3 +O=C(O)C(NC)C(O)C +CC(=O)N1CC2CC1CN2C(=O)CC1(CCOc2ccc(Cl)cc21)C(=O)Nc1cncc2ccccc21 +N#CC1(CC1)CS(=O)(=O)N1Cc2ccc(Cl)cc2C2(CCN(C2=O)c2cncc3ccccc32)C1 +[Ni]C#[O+] +c1ccnc(n1)O +CNc1nc(Cl)nc2n(Cc3cc(Cl)ccc3N[S](C)(=O)=O)cnc12 +O=C(C(=O)C(C)(C)CC)N3C(C(=O)OC(CCc1ccccc1)C2CCCCC2)CCCC3 +FC(F)(F)c1cc(ccc1)N2C=CC(=C(C2=O)c4cc3cnc(nc3cc4)N)C +COc1ccc(cc1F)N +O=C1N(C(=O)CC(=O)N1)CCc2ccccc2 +O=S(=O)(O)c3cc(/N=N/c2c(nn(c1c(cc(cc1)S(=O)(=O)O)C)c2O)C)c(C(=O)O)cc3 +CC(C)C[CH]1C[CH](C1)c2nnc([CH]3CN(C[CH]3C(=O)Nc4ccc(C)cc4C)C(C)=O)n2C5CC5 +N[CH](CCC(=O)N[CH](CS[C]1(Cc2ccccc2)NC(=O)[C](S)(CO)NC1=O)C(=O)NCC(O)=O)C(O)=O +[Ni]O[Fe](C=O)(C=O)C#N +COC(=O)C(NC(=O)C)CS +c1cc(c(cc1)[N+](=O)[O-])C(=O)c2c(nc(s2)NC4C3CC(CC3)C4)N +O=C(O)C(OC(=O)C(NC(=O)CCCC(C(=O)O)N)CS)CSC +Fc1ccc(cc1C(=O)NCc2cc[nH]n2)[CH]3CCNC[CH]3COc4ccc5[nH]ncc5c4 +CN(C)Cc1nc2cccc3C(=O)NCCn1c23 +n1c(C)c(O)c(\C=N\CCCCC(N)C(=O)O)c(c1)COP(=O)(O)O +FC(F)(F)C(C)C1=NN(c2ccccc2C)C(=O)c2cc(F)c(cc21)N1N=C(CO)N(CC)C1=O +N(C=Cc1ccccc1)c2[nH]c3ccccc3n2 +O=C(OCc1ccccc1)C4NCC2(c3c(NC2=O)cccc3)C4 +C[N+](C)(C)C +O=C(O)CCc2ccccc2OCCCOc1c(nc(nc1CC)N)N +O=C1NC(=Nc2ccccc12)c3ccc(cc3)N4CCCCC4 +O=P(O)(O)OCC[N+](C)(C)C +Clc1cnc(Nc2ccccc2)nc1Nc3cccc(NC(=O)C=C)c3 +O=C(O)C(N)COC(O)(O)C +OC1COCC1(C)N1CCN(CC1)c1cc2cc(NC(=O)C3CC43CCOCC4)ncc2cc1Cl +O=C(Nc1c(C)cc(C)cc1C)CO +CC(=O)c1nc(NC(=O)C2(N)CCC2)sc1c3cncc(N)c3 +CN1N=CC(Nc2ccc(CN(C)C)cc2)=C(Cl)C1=O +c1(ccc(cc1)C2=C(C3C(CC2O3)S(Oc4ccc(cc4)Br)(=O)=O)c5ccc(cc5)O)O +O=C(NC(c1ccccc1)C)CCC3=Nc2c(cccc2)C(=O)N3 +CCN1CCC(C1)COc1ccc(cc1)C1c2ccc(O)cc2CC2(CC2)N1C(=O)c1ccccc1 +O=S(=O)(O)N2c1cc(c(cc1N(C3=NC(=O)NC(=O)C23)CC(O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC6OC(n5cnc4c(ncnc45)N)C(O)C6O)C)C +O=C(CN1CCC(CC1)CN1Cc2ccccc2C1=O)c1ccc(F)cc1 +FC(F)(F)Oc1ccc(cc1)/C=C/c1cc(ccc1N1CCCC1)C(N)=O +O=C(N)C(CCC)CCC +C1/C(CCC2C1CCCC2)=C(/c3ccc(cc3)O)c4ccc(O)cc4 +O2C(SC1OC(C(O)C(O)C1O)CO)C(O)C(O)C(O)C2CO +O=C1NC(=O)N(C=C1C)C2OC(C(O)C2OCCNC(N)N)COP(=O)(O)O +COc1cccc(OC)c1C(O)=O +COC(=O)[CH]1C[CH](O)[CH](O)[CH](CO)O1 +Fc1c(ccnc1C(F)(F)F)C(=O)N2CCC(CC2)N3CC(CC#N)(C3)n4cc(cn4)c5ncnc6[nH]ccc56 +Cc1cnn(c2ccc(OCc3ccccc3)c(c2)C(O)=O)c1c4ccc(Cl)c(F)c4 +Nc1ncnc2n(cnc12)[CH]3O[CH](COCC#Cc4nc5c(N)ncnc5n4[CH]6O[CH](CO)[CH](O)[CH]6O[P](O)(O)=O)[CH](O)[CH]3O +C1CCCNC1C4(CN(C(c2c(c(c(cc2)F)F)Nc3ccc(I)cc3F)=O)C4)O +O=C(O)CCc2onc(c1ccccc1)c2 +OC(=O)c1cc(F)c2nc(sc2c1)N3[CH]4CC[CH]3C[CH](C4)OCc5c(onc5c6ccccc6OC(F)(F)F)C7CC7 +O=C1NC(=O)N(C=C1C)C2OC(C(O)C2OCC(=O)NC)COP(=O)(O)O +C[CH](N[S](=O)(=O)CCCOCN1C=CC(=O)NC1=O)c2cccc(OC3CCCC3)c2 +Cc1ncc(s1)CNc2cc(cnc2)F +O=C1C=NC2=C(N1)C(=O)N=C(N)N2 +C1(C=CS(C1)(=O)=O)N(C(C)=O)c2ccc(F)cc2 +COc1cc(ccc1Nc2nc3[nH]cc(C#N)c3c(NC4CCCCC4)n2)N5CCCC5=O +O=C(O)C1CCCCN1Cc1ccccc1 +O=C(OC)NCC(c1c(F)ccc(c1)CCCCCc2ccc(cc2)NC(=O)C(N3C(=O)C(N)CCC3)C4CCCCC4)Cc5cc6ccccc6nc5 +OC[CH]1O[CH](CNC(=O)c2ccc(F)c3ccccc23)[CH](O)[CH](OCC4=Cc5c(F)c(F)ccc5OC4=O)[CH]1O +N[CH](CCCCNC(=O)OCc1ccccc1N=[N+]=[N-])C(O)=O +c3(nc(NC1CCN(CC)CC1)c2cc(OC)c(cc2n3)OC)N(C)C4CCCCC4 +NC1CCc2cc(cnc2NC1=O)/C=C/C(=O)N(C)Cc1oc2ccccc2c1C +O=C(c1ccc(N)cc1)NC(CC(C)C)CC(O)C(NC(=O)COc2c(cccc2C)C)Cc3ccccc3 +C1C(CCC(C(C)(O)C)C1)NC(=O)c4ccc2n(cc(n2)c3ccccc3)c4 +CN(C)C(=O)N[CH]1CC[CH](CC1)CCN2CCN(CC2)c3cccc(Cl)c3Cl +Cc1ccccc1Oc2cc(N3C(=O)NC(=CC3=O)C(F)(F)c4ccccc4)c(F)cc2C#N +O=C1NCC(O)CCC1NC(=O)C(OC)C(O)C(O)C(O)/C=C/C(C)(C)C +COc1cc2CCN(C=O)[CH](CCc3c[nH]c4ccccc34)c2cc1OC +O=S(=O)(N(Cc1occc1)CC(O)Cn4c2ccccc2c3ccccc34)C +COc1cc(ccc1Nc4ncc3NC(=O)Nc2cc(C)ccc2c3n4)N5CC(C)NC(C)C5 +c1nc(O)ccc1 +FC1C(O)C(O)C(OC1O)C +O=S(=O)(N)c1ccc(cc1)Nc2nc3ccc(cn3c2)C(=O)c4c(Cl)cccc4Cl +NC(C(O)=O)Cc1cc(C(=O)O)nn1c2ccc(cc2)CCCF +CCOc1c(scc1)C(N2CCN(CC2)c3cc(c(cc3)[N+]([O-])=O)N4CCCC4)=O +Cc1cc(SC[CH](N)C(O)O)c(O)c(O)c1C +O=C(OC1C(NCC1O)Cc2ccc(OC)cc2)C +O(CCCCCN(C)C)C1CCCCCCCCCCC1 +O=C2c1c(ncnc1N)CC(C2)(C)C +O=S1(=O)CCC(C1)N(CCC(N)=O)C(C)=O +c2c1c(nc(N)s1)ccc2C(=O)N +O=C(NC1CC1)c1cnn(C)c1C(=O)Nc1cc2nc(cn2cc1)c1ccccc1 +O=C(O)C(N)Cc1c(cc(O)cc1C)C +C(c1ccc(cc1)CN(CC(Nc2ccc(C)cc2)=O)C(c3ccc(cc3)N(C)C)=O)(NO)=O +Clc1c(OC)cc(OC)c(Cl)c1NC(=O)N(c2ncnc(c2)Nc4ccc(N3CCN(CC)CC3)cc4)C +O=C1NC=C(NC1=O)C1COc2cc(ccc2S1)C#N +CON=CC(C)(C)NC(=O)[CH](OC)Oc1ccc2ncc(cc2c1)C#C +CC(=O)N(C1CC1)[CH](C(=O)NC2CCCC2)c3ccc(F)cc3 +C[CH](CS)C(=O)N1CCCCC[CH]1C(O)=O +O=C1N=C(N)C=CN1C2OC(C(O)C2O)COP(=O)(O)O +O=C(NC1(C(=O)NCC#N)CCCCC1)c2ccc(Br)cc2N +O=C(O)c1cc(nn1CCN)c4ccnc(c2cc3ccccc3nc2)c4 +CNC(N\C(=N)NCCCNC(OC2C(C)C(OC(C(C)(O)C1C(C)C(OC(O1)(C)C)C(C)CC(C(C2C)OCC#C)(O)C)CC)=O)=O)=O +Oc1c(CNC(CCCCN)C(=O)O)c(cnc1C)COP(=O)(O)O +CN1[CH]2CC[CH]1C[CH](C2)OC(c3ccccc3)c4ccccc4 +O=C(O)CCc2onc(c1ccccc1)c2 +[S-]P(=O)(O)OP(=O)(O)O \ No newline at end of file diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py new file mode 100644 index 000000000..24e56bf77 --- /dev/null +++ b/tests/interface/test_rdkit.py @@ -0,0 +1,160 @@ +from pathlib import Path +import numpy as np +import pytest +from rdkit.Chem import MolFromSmiles, MolToSmiles +from rdkit.Chem.rdchem import Atom, EditableMol, Mol +from rdkit.Chem.rdchem import BondType as RDKitBondType +from rdkit.Chem.rdmolops import ( + AddHs, + RemoveStereochemistry, +) +import biotite.interface.rdkit as rdkit_interface +import biotite.structure as struc +import biotite.structure.info as info +from biotite.interface import LossyConversionWarning +from tests.util import data_dir + + +def _load_smiles(): + with open(Path(data_dir("interface")) / "smiles.txt") as file: + return file.read().splitlines() + + +@pytest.mark.filterwarnings( + "ignore:" + "The coordinates are missing for some atoms. " + "The fallback coordinates will be used instead" +) +@pytest.mark.parametrize( + "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist() +) +def test_conversion_from_biotite(res_name): + """ + Test a round trip conversion of a small molecule (single residue) from Biotite to + RDKit and back and expect to recover the same molecule. + + Run this on randomly selected molecules from the CCD. + """ + ref_atoms = info.residue(res_name, allow_missing_coord=True) + + mol = rdkit_interface.to_mol(ref_atoms) + test_atoms = rdkit_interface.from_mol(mol, add_hydrogen=False) + + assert test_atoms.atom_name.tolist() == ref_atoms.atom_name.tolist() + assert test_atoms.element.tolist() == ref_atoms.element.tolist() + assert test_atoms.charge.tolist() == ref_atoms.charge.tolist() + # Some compounds in the CCD have missing coordinates + assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True) + + # There should be now undefined bonds + assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all() + # Kekulization returns one of multiple resonance structures, so the returned one + # might not be the same as the input + # -> Only check non aromatic bonds for equality + ref_is_aromatic = np.isin( + ref_atoms.bonds.as_array()[:, 2], + [ + struc.BondType.AROMATIC_SINGLE, + struc.BondType.AROMATIC_DOUBLE, + struc.BondType.AROMATIC_TRIPLE, + struc.BondType.AROMATIC, + ], + ) + test_is_aromatic = np.isin( + test_atoms.bonds.as_array()[:, 2], + [ + struc.BondType.AROMATIC_SINGLE, + struc.BondType.AROMATIC_DOUBLE, + struc.BondType.AROMATIC_TRIPLE, + struc.BondType.AROMATIC, + ], + ) + assert np.all(ref_is_aromatic == test_is_aromatic) + # Check also the non-aromatic bonds + assert set( + tuple(bond) for bond in test_atoms.bonds.as_array()[~test_is_aromatic] + ) == set(tuple(bond) for bond in ref_atoms.bonds.as_array()[~ref_is_aromatic]) + + +def test_conversion_from_biotite_multi_model(): + """ + Same as :func:`test_conversion_from_biotite()`, but with a multi-model structure. + """ + RES_NAME = "ALA" + STACK_DEPTH = 1 + + ref_atoms = struc.stack([info.residue(RES_NAME)] * STACK_DEPTH) + + mol = rdkit_interface.to_mol(ref_atoms) + test_atoms = rdkit_interface.from_mol(mol) + + assert test_atoms.atom_name.tolist() == ref_atoms.atom_name.tolist() + assert test_atoms.element.tolist() == ref_atoms.element.tolist() + assert test_atoms.charge.tolist() == ref_atoms.charge.tolist() + assert np.allclose(test_atoms.coord.tolist(), ref_atoms.coord.tolist()) + assert test_atoms.bonds.as_set() == ref_atoms.bonds.as_set() + + +@pytest.mark.parametrize("smiles", _load_smiles()) +def test_conversion_from_rdkit(smiles): + """ + Test a round trip conversion of a small molecule (single residue) from RDKit to + Biotite and back and expect to recover the same molecule. + + Start from SMILES string to ensure that built-in functionality of RDKit is used + to create the initial molecule. + """ + ref_mol = MolFromSmiles(smiles) + atoms = rdkit_interface.from_mol(ref_mol) + test_mol = rdkit_interface.to_mol(atoms) + + # The intermediate AtomArray has explicit hydrogen atoms so add them explicitly + # to the reference as well for fair comparison + ref_mol = AddHs(ref_mol) + # The intermediate AtomArray does not have stereochemistry information, + # so this info cannot be preserved in the comparison + RemoveStereochemistry(ref_mol) + + # RDKit does not support equality checking -> Use SMILES string as proxy + assert MolToSmiles(test_mol) == MolToSmiles(ref_mol) + + +def test_kekulization(): + """ + Check if a benzene ring has alternating single and double bonds. + """ + atoms = info.residue("BNZ") + atoms = atoms[atoms.element != "H"] + # Omit hydrogen for easier comparison of of aromatic bond types later on + ref_bond_types = atoms.bonds.as_array()[:, 2] + + mol = rdkit_interface.to_mol(atoms) + atoms = rdkit_interface.from_mol(mol, add_hydrogen=False) + test_bond_types = atoms.bonds.as_array()[:, 2] + + assert ( + test_bond_types.tolist() == ref_bond_types.tolist() + # There are two possible resonance structures -> swap single and double bonds + or [ + struc.BondType.AROMATIC_SINGLE + if btype == struc.BondType.AROMATIC_DOUBLE + else struc.BondType.AROMATIC_SINGLE + for btype in test_bond_types + ] + == ref_bond_types.tolist() + ) + + +def test_unmappable_bond_type(): + """ + Test that a warning is raised when a bond type cannot be mapped to Biotite. + """ + mol = EditableMol(Mol()) + mol.AddAtom(Atom("F")) + mol.AddAtom(Atom("F")) + # 'HEXTUPLE' has no corresponding Biotite bond type + mol.AddBond(0, 1, RDKitBondType.HEXTUPLE) + mol = mol.GetMol() + + with pytest.warns(LossyConversionWarning): + rdkit_interface.from_mol(mol) diff --git a/tests/interface/test_version.py b/tests/interface/test_version.py new file mode 100644 index 000000000..226225816 --- /dev/null +++ b/tests/interface/test_version.py @@ -0,0 +1,26 @@ +import pytest +from biotite.interface.version import VersionError, requires_version + + +def test_requires_version_for_incompatible_version(): + """ + Expect an exception if the required package version for a function is not met. + """ + + @requires_version("biotite", ">999") + def function_with_incompatible_version(): + pass + + with pytest.raises(VersionError): + function_with_incompatible_version() + + +def test_requires_version_for_missing_package(): + """ + Expect an exception if the required package for a function is not installed. + """ + with pytest.raises(ImportError): + + @requires_version("missing", ">=1.0") + def _function_with_missing_package(): + pass diff --git a/tests/test_doctest.py b/tests/test_doctest.py index 6da470bf8..02ee064ba 100644 --- a/tests/test_doctest.py +++ b/tests/test_doctest.py @@ -170,6 +170,13 @@ is_not_installed("vina"), reason="Software is not installed" ), ), + pytest.param( + "biotite.interface.rdkit", + ["biotite.structure", "biotite.structure.info"], + marks=pytest.mark.skipif( + cannot_import("rdkit"), reason="Software is not installed" + ), + ), ]