From d25c2fdeec724f0db2e1684ad269bf97b852f2e8 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 10 Jun 2024 17:44:26 +0200 Subject: [PATCH] update integration tests - simplify the `conftest.py` - remove unused functions - add tests for `get_link` --- tests/integration/conftest.py | 55 ++++++++---------------- tests/integration/test_nplinker_local.py | 55 +++++++++++++----------- 2 files changed, 49 insertions(+), 61 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ede9f6cb..d4313afe 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,11 +1,11 @@ from __future__ import annotations import os import shutil -import tempfile import zipfile from os import PathLike from pathlib import Path import httpx +import pytest from rich.progress import Progress from . import DATA_DIR @@ -18,46 +18,27 @@ f"https://zenodo.org/records/{dataset_doi.split('.')[-1]}/files/nplinker_local_mode_example.zip" ) -# The temporary directory for the test session -temp_dir = tempfile.gettempdir() -nplinker_root_dir = os.path.join(temp_dir, "nplinker_local_mode_example") +@pytest.fixture(scope="module") +def root_dir(tmp_path_factory): + """Set up the NPLinker root directory for the local mode example dataset.""" + temp_dir = tmp_path_factory.mktemp("nplinker_integration_test") + nplinker_root_dir = temp_dir / "nplinker_local_mode_example" -def pytest_sessionstart(session): - """Pytest hook to run before the entire test session starts. - - This hook makes sure the temporary directory `nplinker_root_dir` is created before any test - starts. When running tests in parallel, the creation operation is done by the master process, - and worker processes are not allowed to do it. - - For more about this hook, see: - 1. https://docs.pytest.org/en/stable/reference.html#_pytest.hookspec.pytest_sessionstart - 2. https://github.com/pytest-dev/pytest-xdist/issues/271#issuecomment-826396320 - """ - workerinput = getattr(session.config, "workerinput", None) - # It's master process or not running in parallell when `workerinput` is None. - if workerinput is None: - if os.path.exists(nplinker_root_dir): - shutil.rmtree(nplinker_root_dir) - dataset = DATA_DIR / "nplinker_local_mode_example.zip" - if not dataset.exists(): - download_archive(dataset_url, DATA_DIR) - with zipfile.ZipFile(dataset, "r") as zip_ref: - zip_ref.extractall(temp_dir) - # NPLinker setting `root_dir` must be a path that exists, so setting it to a temporary directory. - os.environ["NPLINKER_ROOT_DIR"] = nplinker_root_dir - + # Download the dataset and extract it + if os.path.exists(nplinker_root_dir): + shutil.rmtree(nplinker_root_dir) + dataset = DATA_DIR / "nplinker_local_mode_example.zip" + if not dataset.exists(): + download_archive(dataset_url, DATA_DIR) + # the extracted directory is named "nplinker_local_mode_example" + with zipfile.ZipFile(dataset, "r") as zip_ref: + zip_ref.extractall(temp_dir) -def pytest_sessionfinish(session): - """Pytest hook to run after the entire test session finishes. + # Return the root directory + yield str(nplinker_root_dir) - This hook makes sure that temporary directory `nplinker_root_dir` is only removed after all - tests finish. When running tests in parallel, the deletion operation is done by the master - process, and worker processes are not allowed to do it. - """ - workerinput = getattr(session.config, "workerinput", None) - if workerinput is None: - shutil.rmtree(nplinker_root_dir) + shutil.rmtree(nplinker_root_dir) def download_archive( diff --git a/tests/integration/test_nplinker_local.py b/tests/integration/test_nplinker_local.py index ea19b43a..55ce5a84 100644 --- a/tests/integration/test_nplinker_local.py +++ b/tests/integration/test_nplinker_local.py @@ -1,38 +1,25 @@ -import hashlib -from pathlib import Path +import os import pytest from nplinker.nplinker import NPLinker from . import DATA_DIR -# Only tests related to data arranging and loading should be put here. -# For tests on scoring/links, add them to `scoring/test_nplinker_scoring.py`. - - -def get_file_hash(file_path): - h = hashlib.sha256() - with open(file_path, "rb") as file: - while True: - # Reading is buffered, so we can read smaller chunks. - chunk = file.read(h.block_size) - if not chunk: - break - h.update(chunk) - - return h.hexdigest() - - @pytest.fixture(scope="module") -def npl() -> NPLinker: +def npl(root_dir) -> NPLinker: + os.environ["NPLINKER_ROOT_DIR"] = root_dir npl = NPLinker(DATA_DIR / "nplinker_local_mode.toml") npl.load_data() - # remove cached score results before running tests - root_dir = Path(npl.root_dir) - score_cache = root_dir / "output" / "cache_metcalf_scoring.pckl" - score_cache.unlink(missing_ok=True) return npl +def test_init(npl, root_dir): + assert str(npl.config.root_dir) == root_dir + assert npl.config.mode == "local" + assert npl.config.log.level == "DEBUG" + + assert npl.root_dir == root_dir + + # --------------------------------------------------------------------------------------------------- # After manually checking data files for PODP MSV000079284, we have the following numbers: # 370 BGCs from antismash files @@ -63,3 +50,23 @@ def test_load_data(npl: NPLinker): assert len(npl.spectra) == 24652 assert len(npl.mfs) == 29 assert len(npl.strains) == 46 + + +def test_get_links(npl): + # default scoring parameters are used (cutoff=0, standardised=False), + # so all score values should be >= 0 + scoring_method = "metcalf" + lg = npl.get_links(npl.gcfs[:3], scoring_method) + for _, _, scores in lg.links: + score = scores[scoring_method] + assert score.value >= 0 + + lg = npl.get_links(npl.spectra[:1], scoring_method) + for _, _, scores in lg.links: + score = scores[scoring_method] + assert score.value >= 0 + + lg = npl.get_links(npl.mfs[:1], scoring_method) + for _, _, scores in lg.links: + score = scores[scoring_method] + assert score.value >= 0