From ee95a95365b13d7ee361505d340639e592d28cdc Mon Sep 17 00:00:00 2001
From: Alex Carlin <alxcarln@gmail.com>
Date: Mon, 24 Jul 2023 23:58:28 -0700
Subject: [PATCH 1/9] Implement fetching from AlphaFold DB

- add module for fetching from AlphaFold DB
- add tests for module
- more to come
---
 src/biotite/database/alphafold/__init__.py |  12 +++
 src/biotite/database/alphafold/download.py | 120 +++++++++++++++++++++
 tests/database/test_alphafold.py           |  46 ++++++++
 3 files changed, 178 insertions(+)
 create mode 100644 src/biotite/database/alphafold/__init__.py
 create mode 100644 src/biotite/database/alphafold/download.py
 create mode 100644 tests/database/test_alphafold.py

diff --git a/src/biotite/database/alphafold/__init__.py b/src/biotite/database/alphafold/__init__.py
new file mode 100644
index 000000000..498422158
--- /dev/null
+++ b/src/biotite/database/alphafold/__init__.py
@@ -0,0 +1,12 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+A subpackage for downloading protein structures from the AlphaFold DB
+"""
+
+__name__ = "biotite.database.alphafold"
+__author__ = "Alex Carlin"
+
+from .download import *
diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/alphafold/download.py
new file mode 100644
index 000000000..5b4b69c4a
--- /dev/null
+++ b/src/biotite/database/alphafold/download.py
@@ -0,0 +1,120 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+__name__ = "biotite.database.alphafold"
+__author__ = "Alex Carlin"
+__all__ = ["fetch"]
+
+from os.path import isdir, isfile, join, getsize
+import os
+import io
+import requests
+from .check import assert_valid_response
+
+
+_fetch_url = "https://alphafold.com/api/prediction/"
+
+
+def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
+    """
+    Download predicted protein structures from the AlphaFold DB. 
+
+    This function requires an internet connection.
+
+    Parameters
+    ----------
+    ids : str or iterable object of str
+        A single ID or a list of IDs of the file(s)
+        to be downloaded.
+    target_path : str, optional
+        The target directory of the downloaded files.
+        By default, the file content is stored in a file-like object
+        (`StringIO` or `BytesIO`, respectively).
+    format : {"pdb", "cif", "bcif"}
+        The format of the files to be downloaded.
+    overwrite : bool, optional
+        If true, existing files will be overwritten. Otherwise the
+        respective file will only be downloaded if the file does not
+        exist yet in the specified target directory or if the file is
+        empty. (Default: False)
+    verbose: bool, optional
+        If true, the function will output the download progress.
+        (Default: False)
+
+    Returns
+    -------
+    files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
+        The file path(s) to the downloaded files.
+        If a single string (a single ID) was given in `ids`,
+        a single string is returned. If a list (or other iterable
+        object) was given, a list of strings is returned.
+        If no `target_path` was given, the file contents are stored in
+        either `StringIO` or `BytesIO` objects.
+
+    Examples
+    --------
+
+    >>> import os.path
+    >>> file = fetch("P12345", path_to_directory)
+    >>> print(os.path.basename(file))
+    P12345.pdb
+    >>> files = fetch(["P12345", "Q8K9I1"], path_to_directory)
+    >>> print([os.path.basename(file) for file in files])
+    ['P12345.pdb', 'Q8K9I1.pdb']
+    """
+
+    # If only a single ID is present,
+    # put it into a single element list
+    if isinstance(ids, str):
+        ids = [ids]
+        single_element = True
+    else:
+        single_element = False
+    # Create the target folder, if not existing
+    if target_path is not None and not isdir(target_path):
+        os.makedirs(target_path)
+    files = []
+    for i, id in enumerate(ids):
+        # Verbose output
+        if verbose:
+            print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...",
+                  end="\r")
+        # Fetch file from database
+        if target_path is not None:
+            file = join(target_path, id + "." + format)
+        else:
+            # 'file = None' -> store content in a file-like object
+            file = None
+        if file is None \
+                or not isfile(file) \
+                or getsize(file) == 0 \
+                or overwrite:
+            if format in ["pdb", "cif", "bcif"]:
+                metadata_response = requests.get(f"{_fetch_url}/{id}")
+                metadata_json = metadata_response.json()[0]  
+                # a list of length 1 is always returned 
+                file_url = metadata_json[f"{format}Url"]
+                file_response = requests.get(file_url)
+                content = file_response.text
+                assert_valid_response(r.status_code)
+            else:
+                raise ValueError(f"Format '{format}' is not supported")
+            if file is None:
+                file = io.StringIO(content)
+            else:
+                with open(file, "w+") as f:
+                    f.write(content)
+        files.append(file)
+    if verbose:
+        print("\nDone")
+    # If input was a single ID, return only a single path
+    if single_element:
+        return files[0]
+    else:
+        return files
+
+
+
+
+
diff --git a/tests/database/test_alphafold.py b/tests/database/test_alphafold.py
new file mode 100644
index 000000000..9bf1d76f7
--- /dev/null
+++ b/tests/database/test_alphafold.py
@@ -0,0 +1,46 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+import itertools
+import tempfile
+import pytest
+import biotite.database.alphafold as alphafold
+import biotite.structure.io.pdb as pdb 
+from biotite.database import RequestError
+from ..util import cannot_connect_to
+
+
+ALPHAFOLD_URL = "https://alphafold.com/"
+
+
+@pytest.mark.skipif(
+    cannot_connect_to(ALPHAFOLD_URL),
+    reason="AlphaFold is not available"
+)
+@pytest.mark.parametrize(
+    "as_file_like",
+    itertools.product([False, True])
+)
+def test_fetch(as_file_like):
+    path = None if as_file_like else tempfile.gettempdir()
+    file = alphafold.fetch(
+        "P12345", path, overwrite=True
+    )
+    pdb_file = pdb.PDBFile.read(file)
+    array_stack = pdb_file.get_structure()
+    assert len(array_stack) > 0
+    
+
+@pytest.mark.skipif(
+    cannot_connect_to(ALPHAFOLD_URL),
+    reason="AlphaFold is not available"
+)
+@pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
+def test_fetch_invalid(format):
+    with pytest.raises(RequestError):
+        file = alphafold.fetch(
+            "XYZ", format, tempfile.gettempdir(), overwrite=True
+        )
+
+

From 2344d5549e63d152b11037aa4182049d6b7c7f6e Mon Sep 17 00:00:00 2001
From: Alex Carlin <alxcarln@gmail.com>
Date: Thu, 17 Aug 2023 15:24:13 -0700
Subject: [PATCH 2/9] Create basic tests

---
 src/biotite/database/alphafold/download.py | 10 ++++++----
 tests/database/test_alphafold.py           |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/alphafold/download.py
index 5b4b69c4a..0bff83814 100644
--- a/src/biotite/database/alphafold/download.py
+++ b/src/biotite/database/alphafold/download.py
@@ -13,7 +13,7 @@
 from .check import assert_valid_response
 
 
-_fetch_url = "https://alphafold.com/api/prediction/"
+_fetch_url = "https://alphafold.com/api/prediction"
 
 
 def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
@@ -90,14 +90,16 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
                 or not isfile(file) \
                 or getsize(file) == 0 \
                 or overwrite:
-            if format in ["pdb", "cif", "bcif"]:
+            if format in ["pdb", "cif"]:
                 metadata_response = requests.get(f"{_fetch_url}/{id}")
-                metadata_json = metadata_response.json()[0]  
+                assert_valid_response(metadata_response.status_code)
+                metadata_json = metadata_response.json()[0]
+                print(metadata_json) 
                 # a list of length 1 is always returned 
                 file_url = metadata_json[f"{format}Url"]
                 file_response = requests.get(file_url)
+                assert_valid_response(file_response.status_code)
                 content = file_response.text
-                assert_valid_response(r.status_code)
             else:
                 raise ValueError(f"Format '{format}' is not supported")
             if file is None:
diff --git a/tests/database/test_alphafold.py b/tests/database/test_alphafold.py
index 9bf1d76f7..4736f3cbc 100644
--- a/tests/database/test_alphafold.py
+++ b/tests/database/test_alphafold.py
@@ -36,11 +36,11 @@ def test_fetch(as_file_like):
     cannot_connect_to(ALPHAFOLD_URL),
     reason="AlphaFold is not available"
 )
-@pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
+@pytest.mark.parametrize("format", ["pdb", "cif"])
 def test_fetch_invalid(format):
     with pytest.raises(RequestError):
         file = alphafold.fetch(
-            "XYZ", format, tempfile.gettempdir(), overwrite=True
+            "XYZ", target_path=tempfile.gettempdir(), format=format, overwrite=True
         )
 
 

From bf74ac9ef0460efed5ee08902c66fd62892ee2d7 Mon Sep 17 00:00:00 2001
From: Alex Carlin <alxcarln@gmail.com>
Date: Mon, 21 Aug 2023 13:29:18 -0700
Subject: [PATCH 3/9] Update tests to work with CIF files

---
 src/biotite/database/alphafold/download.py |  1 -
 tests/database/test_alphafold.py           | 26 +++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/alphafold/download.py
index 0bff83814..85f58ab33 100644
--- a/src/biotite/database/alphafold/download.py
+++ b/src/biotite/database/alphafold/download.py
@@ -94,7 +94,6 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
                 metadata_response = requests.get(f"{_fetch_url}/{id}")
                 assert_valid_response(metadata_response.status_code)
                 metadata_json = metadata_response.json()[0]
-                print(metadata_json) 
                 # a list of length 1 is always returned 
                 file_url = metadata_json[f"{format}Url"]
                 file_response = requests.get(file_url)
diff --git a/tests/database/test_alphafold.py b/tests/database/test_alphafold.py
index 4736f3cbc..cce08a1d2 100644
--- a/tests/database/test_alphafold.py
+++ b/tests/database/test_alphafold.py
@@ -6,7 +6,8 @@
 import tempfile
 import pytest
 import biotite.database.alphafold as alphafold
-import biotite.structure.io.pdb as pdb 
+import biotite.structure.io.pdb as pdb
+import biotite.structure.io.pdbx as pdbx
 from biotite.database import RequestError
 from ..util import cannot_connect_to
 
@@ -44,3 +45,26 @@ def test_fetch_invalid(format):
         )
 
 
+@pytest.mark.skipif(
+    cannot_connect_to(ALPHAFOLD_URL),
+    reason="AlphaFold is not available"
+)
+@pytest.mark.parametrize("format", ["pdb", "cif"])
+def test_fetch_multiple(format):
+        acc = ["P12345", "P12345"]
+        files = alphafold.fetch(
+            acc, target_path=tempfile.gettempdir(), format=format, overwrite=True
+        )
+        print(files)
+        for file in files:
+            if format == "pdb":
+                pdb_file = pdb.PDBFile.read(file)
+                structure = pdb_file.get_structure()
+                assert len(structure) > 0 
+            elif format == "cif":
+                cif_file = pdbx.PDBxFile.read(file) 
+                assert "citation_author" in cif_file.keys()
+
+
+
+

From bf239b742660d22d5d097555eeafd47068bc2559 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 25 Jan 2025 12:57:34 +0100
Subject: [PATCH 4/9] Reformat code

---
 src/biotite/database/alphafold/download.py | 24 +++-------
 tests/database/test_alphafold.py           | 55 +++++++++-------------
 2 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/alphafold/download.py
index 85f58ab33..498d905cf 100644
--- a/src/biotite/database/alphafold/download.py
+++ b/src/biotite/database/alphafold/download.py
@@ -6,19 +6,18 @@
 __author__ = "Alex Carlin"
 __all__ = ["fetch"]
 
-from os.path import isdir, isfile, join, getsize
-import os
 import io
+import os
+from os.path import getsize, isdir, isfile, join
 import requests
-from .check import assert_valid_response
-
+from biotite.database.alphafold.check import assert_valid_response
 
 _fetch_url = "https://alphafold.com/api/prediction"
 
 
 def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
     """
-    Download predicted protein structures from the AlphaFold DB. 
+    Download predicted protein structures from the AlphaFold DB.
 
     This function requires an internet connection.
 
@@ -78,23 +77,19 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
     for i, id in enumerate(ids):
         # Verbose output
         if verbose:
-            print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...",
-                  end="\r")
+            print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
         # Fetch file from database
         if target_path is not None:
             file = join(target_path, id + "." + format)
         else:
             # 'file = None' -> store content in a file-like object
             file = None
-        if file is None \
-                or not isfile(file) \
-                or getsize(file) == 0 \
-                or overwrite:
+        if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
             if format in ["pdb", "cif"]:
                 metadata_response = requests.get(f"{_fetch_url}/{id}")
                 assert_valid_response(metadata_response.status_code)
                 metadata_json = metadata_response.json()[0]
-                # a list of length 1 is always returned 
+                # a list of length 1 is always returned
                 file_url = metadata_json[f"{format}Url"]
                 file_response = requests.get(file_url)
                 assert_valid_response(file_response.status_code)
@@ -114,8 +109,3 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
         return files[0]
     else:
         return files
-
-
-
-
-
diff --git a/tests/database/test_alphafold.py b/tests/database/test_alphafold.py
index cce08a1d2..9b785e97d 100644
--- a/tests/database/test_alphafold.py
+++ b/tests/database/test_alphafold.py
@@ -9,62 +9,49 @@
 import biotite.structure.io.pdb as pdb
 import biotite.structure.io.pdbx as pdbx
 from biotite.database import RequestError
-from ..util import cannot_connect_to
-
+from tests.util import cannot_connect_to
 
 ALPHAFOLD_URL = "https://alphafold.com/"
 
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL),
-    reason="AlphaFold is not available"
-)
-@pytest.mark.parametrize(
-    "as_file_like",
-    itertools.product([False, True])
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
 )
+@pytest.mark.parametrize("as_file_like", itertools.product([False, True]))
 def test_fetch(as_file_like):
     path = None if as_file_like else tempfile.gettempdir()
-    file = alphafold.fetch(
-        "P12345", path, overwrite=True
-    )
+    file = alphafold.fetch("P12345", path, overwrite=True)
     pdb_file = pdb.PDBFile.read(file)
     array_stack = pdb_file.get_structure()
     assert len(array_stack) > 0
-    
+
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL),
-    reason="AlphaFold is not available"
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
 )
 @pytest.mark.parametrize("format", ["pdb", "cif"])
 def test_fetch_invalid(format):
     with pytest.raises(RequestError):
-        file = alphafold.fetch(
+        alphafold.fetch(
             "XYZ", target_path=tempfile.gettempdir(), format=format, overwrite=True
         )
 
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL),
-    reason="AlphaFold is not available"
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
 )
 @pytest.mark.parametrize("format", ["pdb", "cif"])
 def test_fetch_multiple(format):
-        acc = ["P12345", "P12345"]
-        files = alphafold.fetch(
-            acc, target_path=tempfile.gettempdir(), format=format, overwrite=True
-        )
-        print(files)
-        for file in files:
-            if format == "pdb":
-                pdb_file = pdb.PDBFile.read(file)
-                structure = pdb_file.get_structure()
-                assert len(structure) > 0 
-            elif format == "cif":
-                cif_file = pdbx.PDBxFile.read(file) 
-                assert "citation_author" in cif_file.keys()
-
-
-
-
+    acc = ["P12345", "P12345"]
+    files = alphafold.fetch(
+        acc, target_path=tempfile.gettempdir(), format=format, overwrite=True
+    )
+    print(files)
+    for file in files:
+        if format == "pdb":
+            pdb_file = pdb.PDBFile.read(file)
+            structure = pdb_file.get_structure()
+            assert len(structure) > 0
+        elif format == "cif":
+            cif_file = pdbx.PDBxFile.read(file)
+            assert "citation_author" in cif_file.keys()

From 73bfa992606c679773d5dbc3111eb9682d32f664 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 25 Jan 2025 15:38:21 +0100
Subject: [PATCH 5/9] Finalize AFDB interface

---
 src/biotite/database/alphafold/download.py | 156 +++++++++++++++------
 tests/database/test_alphafold.py           |  80 ++++++-----
 2 files changed, 165 insertions(+), 71 deletions(-)

diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/alphafold/download.py
index 498d905cf..bf39e1eee 100644
--- a/src/biotite/database/alphafold/download.py
+++ b/src/biotite/database/alphafold/download.py
@@ -3,19 +3,25 @@
 # information.
 
 __name__ = "biotite.database.alphafold"
-__author__ = "Alex Carlin"
+__author__ = "Patrick Kunzmann, Alex Carlin"
 __all__ = ["fetch"]
 
 import io
-import os
-from os.path import getsize, isdir, isfile, join
+import re
+from pathlib import Path
+from xml.etree import ElementTree
 import requests
-from biotite.database.alphafold.check import assert_valid_response
+from biotite.database.error import RequestError
 
-_fetch_url = "https://alphafold.com/api/prediction"
+_METADATA_URL = "https://alphafold.com/api/prediction"
+_BINARY_FORMATS = ["bcif"]
+# Adopted from https://www.uniprot.org/help/accession_numbers
+_UNIPROT_PATTERN = (
+    "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
+)
 
 
-def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
+def fetch(ids, format="pdb", target_path=None, overwrite=False, verbose=False):
     """
     Download predicted protein structures from the AlphaFold DB.
 
@@ -24,22 +30,19 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
     Parameters
     ----------
     ids : str or iterable object of str
-        A single ID or a list of IDs of the file(s)
-        to be downloaded.
+        A single ID or a list of IDs of the file(s) to be downloaded.
+    format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
+        The format of the files to be downloaded.
     target_path : str, optional
         The target directory of the downloaded files.
         By default, the file content is stored in a file-like object
         (`StringIO` or `BytesIO`, respectively).
-    format : {"pdb", "cif", "bcif"}
-        The format of the files to be downloaded.
     overwrite : bool, optional
-        If true, existing files will be overwritten. Otherwise the
-        respective file will only be downloaded if the file does not
-        exist yet in the specified target directory or if the file is
-        empty. (Default: False)
+        If true, existing files will be overwritten.
+        Otherwise the respective file will only be downloaded if the file does not
+        exist yet in the specified target directory or if the file is empty.
     verbose: bool, optional
         If true, the function will output the download progress.
-        (Default: False)
 
     Returns
     -------
@@ -55,13 +58,17 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
     --------
 
     >>> import os.path
-    >>> file = fetch("P12345", path_to_directory)
-    >>> print(os.path.basename(file))
-    P12345.pdb
-    >>> files = fetch(["P12345", "Q8K9I1"], path_to_directory)
-    >>> print([os.path.basename(file) for file in files])
-    ['P12345.pdb', 'Q8K9I1.pdb']
+    >>> file = fetch("P12345", "cif", path_to_directory)
+    >>> print(Path(file).name)
+    P12345.cif
+    >>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
+    >>> print([Path(file).name for file in files])
+    ['P12345.cif', 'Q8K9I1.cif']
     """
+    if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
+        raise ValueError(f"Format '{format}' is not supported")
+    if format in ["pdbx", "mmcif"]:
+        format = "cif"
 
     # If only a single ID is present,
     # put it into a single element list
@@ -70,9 +77,10 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
         single_element = True
     else:
         single_element = False
-    # Create the target folder, if not existing
-    if target_path is not None and not isdir(target_path):
-        os.makedirs(target_path)
+    if target_path is not None:
+        target_path = Path(target_path)
+        target_path.mkdir(parents=True, exist_ok=True)
+
     files = []
     for i, id in enumerate(ids):
         # Verbose output
@@ -80,32 +88,102 @@ def fetch(ids, target_path=None, format="pdb", overwrite=False, verbose=False):
             print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
         # Fetch file from database
         if target_path is not None:
-            file = join(target_path, id + "." + format)
+            file = target_path / f"{id}.{format}"
         else:
             # 'file = None' -> store content in a file-like object
             file = None
-        if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
-            if format in ["pdb", "cif"]:
-                metadata_response = requests.get(f"{_fetch_url}/{id}")
-                assert_valid_response(metadata_response.status_code)
-                metadata_json = metadata_response.json()[0]
-                # a list of length 1 is always returned
-                file_url = metadata_json[f"{format}Url"]
-                file_response = requests.get(file_url)
-                assert_valid_response(file_response.status_code)
-                content = file_response.text
+        if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
+            file_response = requests.get(_get_file_url(id, format))
+            _assert_valid_file(file_response, id)
+            if format in _BINARY_FORMATS:
+                content = file_response.content
             else:
-                raise ValueError(f"Format '{format}' is not supported")
+                content = file_response.text
+
             if file is None:
-                file = io.StringIO(content)
+                if format in _BINARY_FORMATS:
+                    file = io.BytesIO(content)
+                else:
+                    file = io.StringIO(content)
             else:
-                with open(file, "w+") as f:
+                mode = "wb+" if format in _BINARY_FORMATS else "w+"
+                with open(file, mode) as f:
                     f.write(content)
+
         files.append(file)
     if verbose:
         print("\nDone")
-    # If input was a single ID, return only a single path
+
+    # Return paths as strings
+    files = [file.as_posix() if isinstance(file, Path) else file for file in files]
+    # If input was a single ID, return only a single element
     if single_element:
         return files[0]
     else:
         return files
+
+
+def _get_file_url(id, format):
+    """
+    Get the actual file URL for the given ID from the ``prediction`` API endpoint.
+
+    Parameters
+    ----------
+    id : str
+        The ID of the file to be downloaded.
+    format : str
+        The format of the file to be downloaded.
+
+    Returns
+    -------
+    file_url : str
+        The URL of the file to be downloaded.
+    """
+    uniprot_id = _extract_id(id)
+    metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
+    if len(metadata) == 0:
+        raise RequestError(f"ID {id} is invalid")
+    # A list of length 1 is always returned, if the response is valid
+    return metadata[0][f"{format}Url"]
+
+
+def _extract_id(id):
+    """
+    Extract a AFDB compatible UniProt ID from the given qualifier.
+    This may comprise
+
+    - Directly the UniProt ID (e.g. ``P12345``) (trivial case)
+    - Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
+
+    Parameters
+    ----------
+    id : str
+        The qualifier to extract the UniProt ID from.
+
+    Returns
+    -------
+    uniprot_id : str
+        The UniProt ID.
+    """
+    match = re.search(_UNIPROT_PATTERN, id)
+    if match is None:
+        raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
+    return match.group()
+
+
+def _assert_valid_file(response, id):
+    """
+    Checks whether the response is an actual structure file
+    or the response a *404* error due to invalid UniProt ID.
+    """
+    if len(response.text) == 0:
+        raise RequestError(f"Received no repsone for '{id}'")
+    try:
+        root = ElementTree.fromstring(response.text)
+        if root.tag == "Error":
+            raise RequestError(
+                f"Error while fetching '{id}': {root.find('Message').text}"
+            )
+    except ElementTree.ParseError:
+        # This is not XML -> the response is probably a valid file
+        pass
diff --git a/tests/database/test_alphafold.py b/tests/database/test_alphafold.py
index 9b785e97d..a811217c5 100644
--- a/tests/database/test_alphafold.py
+++ b/tests/database/test_alphafold.py
@@ -2,7 +2,6 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
 
-import itertools
 import tempfile
 import pytest
 import biotite.database.alphafold as alphafold
@@ -11,47 +10,64 @@
 from biotite.database import RequestError
 from tests.util import cannot_connect_to
 
-ALPHAFOLD_URL = "https://alphafold.com/"
+ALPHAFOLD_URL = "https://alphafold.ebi.ac.uk/"
 
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
 )
-@pytest.mark.parametrize("as_file_like", itertools.product([False, True]))
-def test_fetch(as_file_like):
+@pytest.mark.parametrize("as_file_like", [False, True])
+@pytest.mark.parametrize("entry_id", ["P12345", "AF-P12345-F1"])
+@pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
+def test_fetch(as_file_like, entry_id, format):
+    """
+    Check if files in different formats can be downloaded by being able to parse them.
+    """
     path = None if as_file_like else tempfile.gettempdir()
-    file = alphafold.fetch("P12345", path, overwrite=True)
-    pdb_file = pdb.PDBFile.read(file)
-    array_stack = pdb_file.get_structure()
-    assert len(array_stack) > 0
+    file_path_or_obj = alphafold.fetch(entry_id, format, path, overwrite=True)
+    if format == "pdb":
+        file = pdb.PDBFile.read(file_path_or_obj)
+        pdb.get_structure(file)
+    elif format == "cif":
+        file = pdbx.CIFFile.read(file_path_or_obj)
+        pdbx.get_structure(file)
+    elif format == "bcif":
+        file = pdbx.BinaryCIFFile.read(file_path_or_obj)
+        pdbx.get_structure(file)
 
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
 )
-@pytest.mark.parametrize("format", ["pdb", "cif"])
-def test_fetch_invalid(format):
-    with pytest.raises(RequestError):
-        alphafold.fetch(
-            "XYZ", target_path=tempfile.gettempdir(), format=format, overwrite=True
-        )
+def test_fetch_multiple():
+    """
+    Check if multiple files can be downloaded by being able to parse them.
+    """
+    ids = ["P12345", "Q8K9I1"]
+    files = alphafold.fetch(ids, "cif", tempfile.gettempdir(), overwrite=True)
+    for file in files:
+        assert "citation_author" in pdbx.CIFFile.read(file).block
 
 
 @pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold is not available"
+    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
 )
-@pytest.mark.parametrize("format", ["pdb", "cif"])
-def test_fetch_multiple(format):
-    acc = ["P12345", "P12345"]
-    files = alphafold.fetch(
-        acc, target_path=tempfile.gettempdir(), format=format, overwrite=True
-    )
-    print(files)
-    for file in files:
-        if format == "pdb":
-            pdb_file = pdb.PDBFile.read(file)
-            structure = pdb_file.get_structure()
-            assert len(structure) > 0
-        elif format == "cif":
-            cif_file = pdbx.PDBxFile.read(file)
-            assert "citation_author" in cif_file.keys()
+@pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
+@pytest.mark.parametrize("invalid_id", ["", "XYZ", "A0A12345"])
+@pytest.mark.parametrize("bypass_metadata", [False, True])
+def test_fetch_invalid(monkeypatch, format, invalid_id, bypass_metadata):
+    """
+    Check if proper exceptions are raised if a given ID is invalid.
+    Also check whether the check works on the file retrieval level via
+    :func:`_get_file_url()`, by bypassing the metadata check.
+    """
+    import biotite.database.alphafold.download as module
+
+    if bypass_metadata:
+        monkeypatch.setattr(
+            module,
+            "_get_file_url",
+            lambda id, f: f"https://alphafold.ebi.ac.uk/files/AF-{id}-F1-model_v4.{f}",
+        )
+    with pytest.raises((RequestError, ValueError)):
+        alphafold.fetch(invalid_id, format)

From ad7a08bb0797493dd90cb49814c221bfce6bbbda Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 25 Jan 2025 15:41:26 +0100
Subject: [PATCH 6/9] Rename 'alphafold' subpackage to 'afdb'

---
 .../database/{alphafold => afdb}/__init__.py  |  4 ++--
 .../database/{alphafold => afdb}/download.py  |  2 +-
 .../{test_alphafold.py => test_afdb.py}       | 24 +++++++------------
 3 files changed, 12 insertions(+), 18 deletions(-)
 rename src/biotite/database/{alphafold => afdb}/__init__.py (65%)
 rename src/biotite/database/{alphafold => afdb}/download.py (99%)
 rename tests/database/{test_alphafold.py => test_afdb.py} (75%)

diff --git a/src/biotite/database/alphafold/__init__.py b/src/biotite/database/afdb/__init__.py
similarity index 65%
rename from src/biotite/database/alphafold/__init__.py
rename to src/biotite/database/afdb/__init__.py
index 498422158..d1357138a 100644
--- a/src/biotite/database/alphafold/__init__.py
+++ b/src/biotite/database/afdb/__init__.py
@@ -3,10 +3,10 @@
 # information.
 
 """
-A subpackage for downloading protein structures from the AlphaFold DB
+A subpackage for downloading predicted protein structures from the AlphaFold DB.
 """
 
-__name__ = "biotite.database.alphafold"
+__name__ = "biotite.database.afdb"
 __author__ = "Alex Carlin"
 
 from .download import *
diff --git a/src/biotite/database/alphafold/download.py b/src/biotite/database/afdb/download.py
similarity index 99%
rename from src/biotite/database/alphafold/download.py
rename to src/biotite/database/afdb/download.py
index bf39e1eee..0504df67b 100644
--- a/src/biotite/database/alphafold/download.py
+++ b/src/biotite/database/afdb/download.py
@@ -2,7 +2,7 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
 
-__name__ = "biotite.database.alphafold"
+__name__ = "biotite.database.afdb"
 __author__ = "Patrick Kunzmann, Alex Carlin"
 __all__ = ["fetch"]
 
diff --git a/tests/database/test_alphafold.py b/tests/database/test_afdb.py
similarity index 75%
rename from tests/database/test_alphafold.py
rename to tests/database/test_afdb.py
index a811217c5..e01aa661e 100644
--- a/tests/database/test_alphafold.py
+++ b/tests/database/test_afdb.py
@@ -4,18 +4,16 @@
 
 import tempfile
 import pytest
-import biotite.database.alphafold as alphafold
+import biotite.database.afdb as afdb
 import biotite.structure.io.pdb as pdb
 import biotite.structure.io.pdbx as pdbx
 from biotite.database import RequestError
 from tests.util import cannot_connect_to
 
-ALPHAFOLD_URL = "https://alphafold.ebi.ac.uk/"
+AFDB_URL = "https://alphafold.ebi.ac.uk/"
 
 
-@pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
-)
+@pytest.mark.skipif(cannot_connect_to(AFDB_URL), reason="AlphaFold DB is not available")
 @pytest.mark.parametrize("as_file_like", [False, True])
 @pytest.mark.parametrize("entry_id", ["P12345", "AF-P12345-F1"])
 @pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
@@ -24,7 +22,7 @@ def test_fetch(as_file_like, entry_id, format):
     Check if files in different formats can be downloaded by being able to parse them.
     """
     path = None if as_file_like else tempfile.gettempdir()
-    file_path_or_obj = alphafold.fetch(entry_id, format, path, overwrite=True)
+    file_path_or_obj = afdb.fetch(entry_id, format, path, overwrite=True)
     if format == "pdb":
         file = pdb.PDBFile.read(file_path_or_obj)
         pdb.get_structure(file)
@@ -36,22 +34,18 @@ def test_fetch(as_file_like, entry_id, format):
         pdbx.get_structure(file)
 
 
-@pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
-)
+@pytest.mark.skipif(cannot_connect_to(AFDB_URL), reason="AlphaFold DB is not available")
 def test_fetch_multiple():
     """
     Check if multiple files can be downloaded by being able to parse them.
     """
     ids = ["P12345", "Q8K9I1"]
-    files = alphafold.fetch(ids, "cif", tempfile.gettempdir(), overwrite=True)
+    files = afdb.fetch(ids, "cif", tempfile.gettempdir(), overwrite=True)
     for file in files:
         assert "citation_author" in pdbx.CIFFile.read(file).block
 
 
-@pytest.mark.skipif(
-    cannot_connect_to(ALPHAFOLD_URL), reason="AlphaFold DB is not available"
-)
+@pytest.mark.skipif(cannot_connect_to(AFDB_URL), reason="AlphaFold DB is not available")
 @pytest.mark.parametrize("format", ["pdb", "cif", "bcif"])
 @pytest.mark.parametrize("invalid_id", ["", "XYZ", "A0A12345"])
 @pytest.mark.parametrize("bypass_metadata", [False, True])
@@ -61,7 +55,7 @@ def test_fetch_invalid(monkeypatch, format, invalid_id, bypass_metadata):
     Also check whether the check works on the file retrieval level via
     :func:`_get_file_url()`, by bypassing the metadata check.
     """
-    import biotite.database.alphafold.download as module
+    import biotite.database.afdb.download as module
 
     if bypass_metadata:
         monkeypatch.setattr(
@@ -70,4 +64,4 @@ def test_fetch_invalid(monkeypatch, format, invalid_id, bypass_metadata):
             lambda id, f: f"https://alphafold.ebi.ac.uk/files/AF-{id}-F1-model_v4.{f}",
         )
     with pytest.raises((RequestError, ValueError)):
-        alphafold.fetch(invalid_id, format)
+        afdb.fetch(invalid_id, format)

From c5b1bc26e7ce48276d66e7fc820af3035795efcc Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sun, 26 Jan 2025 11:27:44 +0100
Subject: [PATCH 7/9] Fix docstring

---
 src/biotite/database/afdb/download.py | 16 +++++++++-------
 tests/test_doctest.py                 |  8 ++++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/biotite/database/afdb/download.py b/src/biotite/database/afdb/download.py
index 0504df67b..0225ab1b2 100644
--- a/src/biotite/database/afdb/download.py
+++ b/src/biotite/database/afdb/download.py
@@ -21,7 +21,7 @@
 )
 
 
-def fetch(ids, format="pdb", target_path=None, overwrite=False, verbose=False):
+def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
     """
     Download predicted protein structures from the AlphaFold DB.
 
@@ -31,6 +31,8 @@ def fetch(ids, format="pdb", target_path=None, overwrite=False, verbose=False):
     ----------
     ids : str or iterable object of str
         A single ID or a list of IDs of the file(s) to be downloaded.
+        They can be either UniProt IDs (e.g. ``P12345``) or AlphaFold DB IDs
+        (e.g. ``AF-P12345F1``).
     format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
         The format of the files to be downloaded.
     target_path : str, optional
@@ -48,16 +50,16 @@ def fetch(ids, format="pdb", target_path=None, overwrite=False, verbose=False):
     -------
     files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
         The file path(s) to the downloaded files.
-        If a single string (a single ID) was given in `ids`,
-        a single string is returned. If a list (or other iterable
-        object) was given, a list of strings is returned.
-        If no `target_path` was given, the file contents are stored in
-        either `StringIO` or `BytesIO` objects.
+        If a single string (a single ID) was given in `ids`, a single string is
+        returned.
+        If a list (or other iterable object) was given, a list of strings is returned.
+        If no `target_path` was given, the file contents are stored in either
+        ``StringIO`` or ``BytesIO`` objects.
 
     Examples
     --------
 
-    >>> import os.path
+    >>> from pathlib import Path
     >>> file = fetch("P12345", "cif", path_to_directory)
     >>> print(Path(file).name)
     P12345.cif
diff --git a/tests/test_doctest.py b/tests/test_doctest.py
index 6da470bf8..10eb4c803 100644
--- a/tests/test_doctest.py
+++ b/tests/test_doctest.py
@@ -15,6 +15,7 @@
 
 NCBI_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/"
 RCSB_URL = "https://www.rcsb.org/"
+AFDB_URL = "https://alphafold.ebi.ac.uk/"
 UNIPROT_URL = "https://www.uniprot.org/"
 PUBCHEM_URL = "https://pubchem.ncbi.nlm.nih.gov/"
 
@@ -82,6 +83,13 @@
             cannot_connect_to(RCSB_URL), reason="RCSB PDB is not available"
         ),
     ),
+    pytest.param(
+        "biotite.database.afdb",
+        [],
+        marks=pytest.mark.skipif(
+            cannot_connect_to(AFDB_URL), reason="AlphaFold DB is not available"
+        ),
+    ),
     pytest.param(
         "biotite.database.uniprot",
         [],

From 5b0259f29170619c7e13ba8e22fde20946f718b3 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sun, 26 Jan 2025 11:28:02 +0100
Subject: [PATCH 8/9] Add tutorial for AFDB

---
 doc/tutorial/database/rcsb.rst | 46 ++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/doc/tutorial/database/rcsb.rst b/doc/tutorial/database/rcsb.rst
index 8057346aa..e48bab1c7 100644
--- a/doc/tutorial/database/rcsb.rst
+++ b/doc/tutorial/database/rcsb.rst
@@ -128,3 +128,49 @@ Note that grouping may omit PDB IDs in search results, if such PDB IDs
 cannot be grouped.
 For example in the case shown above only a few PDB entries were
 uploaded as collection and hence are part of the search results.
+
+Getting computational models
+----------------------------
+By default :func:`search()` only returns experimental structures.
+In addition to that the RCSB lists an order of magnitude more computational models.
+They can be included in search results by adding ``"computational"`` to the
+``content_types`` parameter.
+
+.. jupyter-execute::
+
+    query = (
+        rcsb.FieldQuery("rcsb_polymer_entity.pdbx_description", contains_phrase="Hexokinase")
+        & rcsb.FieldQuery(
+            "rcsb_entity_source_organism.scientific_name", exact_match="Homo sapiens"
+        )
+    )
+    ids = rcsb.search(query, content_types=("experimental", "computational"))
+    print(ids)
+
+The returned four-character IDs are the RCSB PDB IDs of experimental structures
+like we already saw above.
+The IDs with the ``AF_`` on the other hand are computational models from
+*AlphaFold DB*.
+
+.. currentmodule:: biotite.database.afdb
+
+To download those we require another subpackage: :mod:`biotite.database.afdb`.
+Its :func:`fetch()` function works very similar.
+
+.. jupyter-execute::
+
+    import biotite.database.afdb as afdb
+
+    files = []
+    # For the sake of run time, only download the first 5 entries
+    for id in ids[:5]:
+        if id.startswith("AF_"):
+            # Entry is in AlphaFold DB
+            files.append(afdb.fetch(id, "cif", gettempdir()))
+        elif id.startswith("MA_"):
+            # Entry is in ModelArchive, which is not yet supported
+            raise NotImplementedError
+        else:
+            # Entry is in RCSB PDB
+            files.append(rcsb.fetch(id, "cif", gettempdir()))
+    print([basename(file) for file in files])
\ No newline at end of file

From 6de13651ed4584bcb7738e6ce3a9894771ac0170 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 1 Feb 2025 19:10:12 +0100
Subject: [PATCH 9/9] Add example

---
 .../scripts/structure/modeling/model_lddt.py  | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 doc/examples/scripts/structure/modeling/model_lddt.py

diff --git a/doc/examples/scripts/structure/modeling/model_lddt.py b/doc/examples/scripts/structure/modeling/model_lddt.py
new file mode 100644
index 000000000..05b6825fd
--- /dev/null
+++ b/doc/examples/scripts/structure/modeling/model_lddt.py
@@ -0,0 +1,129 @@
+r"""
+LDDT for predicted structure evaluation
+=======================================
+
+This example evaluates the quality of a predicted structure from *AlphaFold DB* compared
+to the experimental structure of a protein of interest by the means of the lDDT score.
+Furthermore, the measured lDDT score is compared to the pLDDT score predicted by the
+model.
+"""
+
+# Code source: Patrick Kunzmann
+# License: BSD 3 clause
+
+import matplotlib.pyplot as plt
+import numpy as np
+import biotite
+import biotite.database.afdb as afdb
+import biotite.database.rcsb as rcsb
+import biotite.sequence as seq
+import biotite.sequence.align as align
+import biotite.structure as struc
+import biotite.structure.io.pdbx as pdbx
+
+# Uniprot ID of the protein of interest (in this case human beta-galactosidase)
+UNIPROT_ID = "P16278"
+
+
+## Get the reference experimental structure from the PDB
+query = rcsb.FieldQuery(
+    "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession",
+    exact_match=UNIPROT_ID,
+)
+# The UniProt ID is defined for a single chain
+ids = rcsb.search(query, return_type="polymer_instance")
+# Simply use the first matching chain as reference
+pdb_id, chain_id = ids[0].split(".")
+pdbx_file = pdbx.BinaryCIFFile.read(rcsb.fetch(pdb_id, "bcif"))
+reference = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
+reference = reference[reference.chain_id == chain_id]
+# The experimental structure may contain additional small molecules
+# (e.g. water, ions etc.) that are not part of the predicted structure
+reference = reference[struc.filter_amino_acids(reference)]
+
+
+## Get the predicted structure from AlphaFold DB
+pdbx_file = pdbx.BinaryCIFFile.read(afdb.fetch(UNIPROT_ID, "bcif"))
+# Use 'label_<x>' fields to make sure the residue ID is the the same as given in the
+# `ma_qa_metric_local` category, where the pLDDT is obtained from
+model = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
+
+
+## Filter the structures to common atoms that are present in both structures
+reference_sequence = struc.to_sequence(reference)[0][0]
+model_sequence = struc.to_sequence(model)[0][0]
+# This script does not rely on consistent residue numbering,
+# so a sequence alignment is done instead
+identity_matrix = align.SubstitutionMatrix(
+    seq.ProteinSequence.alphabet,
+    seq.ProteinSequence.alphabet,
+    np.eye(len(seq.ProteinSequence.alphabet), dtype=int),
+)
+alignment = align.align_optimal(
+    reference_sequence,
+    model_sequence,
+    # Residues might be missing due to experimental reasons but not due to homology
+    # -> use a simple identity matrix
+    identity_matrix,
+    gap_penalty=-1,
+    terminal_penalty=False,
+    max_number=1,
+)[0]
+# Remove residues from alignment
+# that have no correspondence in the respective other structure
+# -> Remove gaps (-1 entries in trace)
+alignment = alignment[(alignment.trace != -1).all(axis=1)]
+# Map the remaining alignment columns to atom indices
+reference = reference[
+    # Each mask is True for all atoms in one residue
+    struc.get_residue_masks(reference, struc.get_residue_starts(reference)) \
+    # Only keep masks for residues that correspond to remaining alignment columns
+    [alignment.trace[:,0]] \
+    # And aggregate them to get a single mask
+    .any(axis=0)
+]  # fmt: skip
+model = model[
+    struc.get_residue_masks(model, struc.get_residue_starts(model))[
+        alignment.trace[:, 1]
+    ].any(axis=0)
+]
+
+
+## Get predicted lDDT from the model file
+plddt_category = pdbx_file.block["ma_qa_metric_local"]
+plddt_res_ids = plddt_category["label_seq_id"].as_array(int)
+plddt = plddt_category["metric_value"].as_array(float) / 100
+# Remove values for residues that were removed in the alignment process
+mask = np.isin(plddt_res_ids, model.res_id)
+plddt_res_ids = plddt_res_ids[mask]
+plddt = plddt[mask]
+
+
+## Compute actual lDDT by comparing the model to the reference
+lddt_res_ids = np.unique(model.res_id)
+# The pLDDT predicts the lDDT of CA atoms, so for consistency we do the same
+ca_mask = model.atom_name == "CA"
+lddt = struc.lddt(reference[ca_mask], model[ca_mask], aggregation="residue")
+
+
+## Compare predicted to measured lDDT
+fig, ax = plt.subplots(figsize=(8.0, 4.0))
+ax.plot(
+    plddt_res_ids,
+    plddt,
+    color=biotite.colors["dimgreen"],
+    linestyle="-",
+    label="predicted",
+)
+ax.plot(
+    lddt_res_ids,
+    lddt,
+    color=biotite.colors["lightorange"],
+    linestyle="-",
+    label="measured",
+)
+ax.legend()
+ax.set_xlabel("Residue ID")
+ax.set_ylabel("lDDT")
+ax.autoscale(axis="x", tight=True)
+plt.show()