From ffa36a4e73d2f4bcb49830942eb76053911d4117 Mon Sep 17 00:00:00 2001 From: Siyuan Sheng Date: Wed, 13 Mar 2024 21:08:27 -0700 Subject: [PATCH] Add docker tests for fsspec --- tests/conftest.py | 38 ++++++++++- tests/fs/test_docker_alluxio_fsspec.py | 9 +++ tests/fs/test_docker_fsspec_cat.py | 87 ++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tests/fs/test_docker_alluxio_fsspec.py create mode 100644 tests/fs/test_docker_fsspec_cat.py diff --git a/tests/conftest.py b/tests/conftest.py index 546d0f0..b51cdd6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,12 +3,14 @@ import shlex import subprocess import time +import fsspec from urllib.parse import urlparse import pytest import requests from alluxiofs import AlluxioClient +from alluxiofs import AlluxioFileSystem LOGGER = logging.getLogger("alluxio_test") TEST_ROOT = os.getenv("TEST_ROOT", "file:///opt/alluxio/ufs/") @@ -119,7 +121,7 @@ def stop_alluxio_dockers(with_etcd=False): stop_docker(ETCD_CONTAINER) -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def docker_alluxio(): if "ALLUXIO_URL" in os.environ: # assume we already have a server already set up @@ -130,7 +132,7 @@ def docker_alluxio(): stop_alluxio_dockers() -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def docker_alluxio_with_etcd(): if "ALLUXIO_URL" in os.environ: # assume we already have a server already set up @@ -160,3 +162,35 @@ def etcd_alluxio_client(docker_alluxio_with_etcd): host = parsed_url.hostname etcd_alluxio_client = AlluxioClient(etcd_hosts=host) yield etcd_alluxio_client + + +@pytest.fixture +def alluxio_file_system(docker_alluxio): + LOGGER.debug(f"get AlluxioFileSystem connect to {docker_alluxio}") + parsed_url = urlparse(docker_alluxio) + host = parsed_url.hostname + fsspec.register_implementation("alluxio", AlluxioFileSystem, clobber=True) + alluxio_file_system = fsspec.filesystem( + "alluxio", + worker_hosts=host, + target_protocol="file", + preload_path=ALLUXIO_FILE_PATH, + ) + yield alluxio_file_system + + +@pytest.fixture +def etcd_alluxio_file_system(docker_alluxio_with_etcd): + LOGGER.debug( + f"get etcd AlluxioFileSystem connect to {docker_alluxio_with_etcd}" + ) + parsed_url = urlparse(docker_alluxio_with_etcd) + host = parsed_url.hostname + fsspec.register_implementation("alluxio", AlluxioFileSystem, clobber=True) + etcd_alluxio_file_system = fsspec.filesystem( + "alluxio", + etcd_hosts=host, + target_protocol="file", + preload_path=ALLUXIO_FILE_PATH, + ) + yield etcd_alluxio_file_system diff --git a/tests/fs/test_docker_alluxio_fsspec.py b/tests/fs/test_docker_alluxio_fsspec.py new file mode 100644 index 0000000..5445974 --- /dev/null +++ b/tests/fs/test_docker_alluxio_fsspec.py @@ -0,0 +1,9 @@ +from tests.conftest import TEST_ROOT + + +def test_simple_fsspec(alluxio_file_system): + alluxio_file_system.ls(TEST_ROOT) # no error + + +def test_simple_etcd_fsspec(etcd_alluxio_file_system): + etcd_alluxio_file_system.ls(TEST_ROOT) # no error diff --git a/tests/fs/test_docker_fsspec_cat.py b/tests/fs/test_docker_fsspec_cat.py new file mode 100644 index 0000000..198a886 --- /dev/null +++ b/tests/fs/test_docker_fsspec_cat.py @@ -0,0 +1,87 @@ +import os +import random + +from alluxiofs import AlluxioFileSystem +from tests.conftest import ALLUXIO_FILE_PATH +from tests.conftest import LOCAL_FILE_PATH + +NUM_TESTS = 10 + +import logging + +LOGGER = logging.getLogger(__name__) + + +def validate_read_range( + alluxio_file_system: AlluxioFileSystem, + alluxio_file_path, + local_file_path, + offset, + length, +): + alluxio_data = alluxio_file_system.cat_file( + alluxio_file_path, offset, offset + length + ) + + with open(local_file_path, "rb") as local_file: + local_file.seek(offset) + local_data = local_file.read(length) + + try: + assert alluxio_data == local_data + except AssertionError: + error_message = ( + f"Data mismatch between Alluxio and local file\n" + f"Alluxio file path: {alluxio_file_path}\n" + f"Local file path: {local_file_path}\n" + f"Offset: {offset}\n" + f"Length: {length}\n" + f"Alluxio data: {alluxio_data}\n" + f"Local data: {local_data}" + ) + raise AssertionError(error_message) + + +def test_alluxio_fsspec_cat_file(alluxio_file_system: AlluxioFileSystem): + file_size = os.path.getsize(LOCAL_FILE_PATH) + + alluxio_file_system.ls(ALLUXIO_FILE_PATH) + + # Validate normal case + max_length = 13 * 1024 + for _ in range(NUM_TESTS): + offset = random.randint(0, file_size - 1) + length = min(random.randint(1, file_size - offset), max_length) + validate_read_range( + alluxio_file_system, + ALLUXIO_FILE_PATH, + LOCAL_FILE_PATH, + offset, + length, + ) + + LOGGER.debug( + f"Data matches between Alluxio file and local source file for {NUM_TESTS} times" + ) + + special_test_cases = [ + (file_size - 1, -1), + (file_size - 1, file_size + 1), + (file_size, 100), + ] + + for offset, length in special_test_cases: + validate_read_range( + alluxio_file_system, + ALLUXIO_FILE_PATH, + LOCAL_FILE_PATH, + offset, + length, + ) + LOGGER.debug("Passed corner test cases") + + +def test_etcd_alluxio_fsspec_cat_file( + etcd_alluxio_file_system: AlluxioFileSystem, +): + test_alluxio_fsspec_cat_file(etcd_alluxio_file_system)