From d25c2fdeec724f0db2e1684ad269bf97b852f2e8 Mon Sep 17 00:00:00 2001
From: Cunliang Geng <c.geng@esciencecenter.nl>
Date: Mon, 10 Jun 2024 17:44:26 +0200
Subject: [PATCH] update integration tests

- simplify the `conftest.py`
- remove unused functions
- add tests for `get_link`
---
 tests/integration/conftest.py            | 55 ++++++++----------------
 tests/integration/test_nplinker_local.py | 55 +++++++++++++-----------
 2 files changed, 49 insertions(+), 61 deletions(-)

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index ede9f6cb..d4313afe 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 import os
 import shutil
-import tempfile
 import zipfile
 from os import PathLike
 from pathlib import Path
 import httpx
+import pytest
 from rich.progress import Progress
 from . import DATA_DIR
 
@@ -18,46 +18,27 @@
     f"https://zenodo.org/records/{dataset_doi.split('.')[-1]}/files/nplinker_local_mode_example.zip"
 )
 
-# The temporary directory for the test session
-temp_dir = tempfile.gettempdir()
-nplinker_root_dir = os.path.join(temp_dir, "nplinker_local_mode_example")
 
+@pytest.fixture(scope="module")
+def root_dir(tmp_path_factory):
+    """Set up the NPLinker root directory for the local mode example dataset."""
+    temp_dir = tmp_path_factory.mktemp("nplinker_integration_test")
+    nplinker_root_dir = temp_dir / "nplinker_local_mode_example"
 
-def pytest_sessionstart(session):
-    """Pytest hook to run before the entire test session starts.
-
-    This hook makes sure the temporary directory `nplinker_root_dir` is created before any test
-    starts. When running tests in parallel, the creation operation is done by the master process,
-    and worker processes are not allowed to do it.
-
-    For more about this hook, see:
-    1. https://docs.pytest.org/en/stable/reference.html#_pytest.hookspec.pytest_sessionstart
-    2. https://github.com/pytest-dev/pytest-xdist/issues/271#issuecomment-826396320
-    """
-    workerinput = getattr(session.config, "workerinput", None)
-    # It's master process or not running in parallell when `workerinput` is None.
-    if workerinput is None:
-        if os.path.exists(nplinker_root_dir):
-            shutil.rmtree(nplinker_root_dir)
-        dataset = DATA_DIR / "nplinker_local_mode_example.zip"
-        if not dataset.exists():
-            download_archive(dataset_url, DATA_DIR)
-        with zipfile.ZipFile(dataset, "r") as zip_ref:
-            zip_ref.extractall(temp_dir)
-    # NPLinker setting `root_dir` must be a path that exists, so setting it to a temporary directory.
-    os.environ["NPLINKER_ROOT_DIR"] = nplinker_root_dir
-
+    # Download the dataset and extract it
+    if os.path.exists(nplinker_root_dir):
+        shutil.rmtree(nplinker_root_dir)
+    dataset = DATA_DIR / "nplinker_local_mode_example.zip"
+    if not dataset.exists():
+        download_archive(dataset_url, DATA_DIR)
+    # the extracted directory is named "nplinker_local_mode_example"
+    with zipfile.ZipFile(dataset, "r") as zip_ref:
+        zip_ref.extractall(temp_dir)
 
-def pytest_sessionfinish(session):
-    """Pytest hook to run after the entire test session finishes.
+    # Return the root directory
+    yield str(nplinker_root_dir)
 
-    This hook makes sure that temporary directory `nplinker_root_dir` is only removed after all
-    tests finish. When running tests in parallel, the deletion operation is done by the master
-    process, and worker processes are not allowed to do it.
-    """
-    workerinput = getattr(session.config, "workerinput", None)
-    if workerinput is None:
-        shutil.rmtree(nplinker_root_dir)
+    shutil.rmtree(nplinker_root_dir)
 
 
 def download_archive(
diff --git a/tests/integration/test_nplinker_local.py b/tests/integration/test_nplinker_local.py
index ea19b43a..55ce5a84 100644
--- a/tests/integration/test_nplinker_local.py
+++ b/tests/integration/test_nplinker_local.py
@@ -1,38 +1,25 @@
-import hashlib
-from pathlib import Path
+import os
 import pytest
 from nplinker.nplinker import NPLinker
 from . import DATA_DIR
 
 
-# Only tests related to data arranging and loading should be put here.
-# For tests on scoring/links, add them to `scoring/test_nplinker_scoring.py`.
-
-
-def get_file_hash(file_path):
-    h = hashlib.sha256()
-    with open(file_path, "rb") as file:
-        while True:
-            # Reading is buffered, so we can read smaller chunks.
-            chunk = file.read(h.block_size)
-            if not chunk:
-                break
-            h.update(chunk)
-
-    return h.hexdigest()
-
-
 @pytest.fixture(scope="module")
-def npl() -> NPLinker:
+def npl(root_dir) -> NPLinker:
+    os.environ["NPLINKER_ROOT_DIR"] = root_dir
     npl = NPLinker(DATA_DIR / "nplinker_local_mode.toml")
     npl.load_data()
-    # remove cached score results before running tests
-    root_dir = Path(npl.root_dir)
-    score_cache = root_dir / "output" / "cache_metcalf_scoring.pckl"
-    score_cache.unlink(missing_ok=True)
     return npl
 
 
+def test_init(npl, root_dir):
+    assert str(npl.config.root_dir) == root_dir
+    assert npl.config.mode == "local"
+    assert npl.config.log.level == "DEBUG"
+
+    assert npl.root_dir == root_dir
+
+
 # ---------------------------------------------------------------------------------------------------
 # After manually checking data files for PODP MSV000079284, we have the following numbers:
 # 370 BGCs from antismash files
@@ -63,3 +50,23 @@ def test_load_data(npl: NPLinker):
     assert len(npl.spectra) == 24652
     assert len(npl.mfs) == 29
     assert len(npl.strains) == 46
+
+
+def test_get_links(npl):
+    # default scoring parameters are used (cutoff=0, standardised=False),
+    # so all score values should be >= 0
+    scoring_method = "metcalf"
+    lg = npl.get_links(npl.gcfs[:3], scoring_method)
+    for _, _, scores in lg.links:
+        score = scores[scoring_method]
+        assert score.value >= 0
+
+    lg = npl.get_links(npl.spectra[:1], scoring_method)
+    for _, _, scores in lg.links:
+        score = scores[scoring_method]
+        assert score.value >= 0
+
+    lg = npl.get_links(npl.mfs[:1], scoring_method)
+    for _, _, scores in lg.links:
+        score = scores[scoring_method]
+        assert score.value >= 0