From 907f485cd8a0d0b3751bf7bb924c8661676c3fc3 Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 18 Apr 2024 14:45:49 +0200 Subject: [PATCH] init tests and tests for Settings --- .github/workflows/ci.yaml | 85 ++++++++ pyproject.toml | 3 +- src/batch_llm/settings.py | 15 +- tests/conftest.py | 40 ++++ tests/core/test_experiment.py | 0 tests/core/test_experiment_pipeline.py | 0 tests/core/test_settings.py | 258 +++++++++++++++++++++++++ tests/core/test_utils.py | 0 8 files changed, 392 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/ci.yaml create mode 100644 tests/conftest.py create mode 100644 tests/core/test_experiment.py create mode 100644 tests/core/test_experiment_pipeline.py create mode 100644 tests/core/test_settings.py create mode 100644 tests/core/test_utils.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..1ce5ac27 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,85 @@ +name: CI + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + - develop + release: + types: + - published + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + pre-commit: + name: Lint with pre-commit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.11 + - uses: pre-commit/action@v3.0.0 + + checks: + name: Check Python ${{ matrix.python-version }} on ${{ matrix.runs-on }} + runs-on: ${{ matrix.runs-on }} + needs: [pre-commit] + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12"] + runs-on: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install package + run: python -m pip install .[test] + + - name: Test package + run: python -m pytest -ra --cov=batch_llm + + - name: Upload coverage report + uses: codecov/codecov-action@v4.1.0 + + # dist: + # name: Distribution build + # runs-on: ubuntu-latest + # needs: [pre-commit] + + # steps: + # - uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + + # - name: Build sdist and wheel + # run: pipx run build + + # - uses: actions/upload-artifact@v4 + # with: + # path: dist + + # - name: Check products + # run: pipx run twine check dist/* + + # - uses: pypa/gh-action-pypi-publish@v1.8.14 + # if: github.event_name == 'release' && github.event.action == 'published' + # with: + # # Remember to generate this and set it in "GitHub Secrets" + # user: __token__ + # password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/pyproject.toml b/pyproject.toml index f3c3abe2..0dc153b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ vertexai = {version = "^1.46.0", extras = ["gemini"]} google-cloud-aiplatform = {version = "^1.46.0", extras = ["gemini"]} google-generativeai = {version = "^0.4.1", extras = ["gemini"]} pillow = "^10.3.0" -openai = "^1.19.0" +openai = {version = "^1.19.0", extras = ["azure_openai", "openai"]} [tool.poetry.group.dev.dependencies] @@ -27,6 +27,7 @@ black = "^24.3.0" isort = "^5.13.2" pre-commit = "^3.7.0" ipykernel = "^6.29.4" +pytest = "^8.1.1" [build-system] requires = ["poetry-core"] diff --git a/src/batch_llm/settings.py b/src/batch_llm/settings.py index c9983184..bea9061d 100644 --- a/src/batch_llm/settings.py +++ b/src/batch_llm/settings.py @@ -40,8 +40,10 @@ def check_folder_exists(data_folder: str) -> tuple[str]: Raises a ValueError if the data folder does not exist. """ # check if data folder exists - if not os.path.exists(data_folder): - raise ValueError(f"Data folder '{data_folder}' does not exist") + if not os.path.isdir(data_folder): + raise ValueError( + f"Data folder '{data_folder}' must be a valid path to a folder" + ) return True @@ -85,8 +87,7 @@ def input_folder(self) -> str: @input_folder.setter def input_folder(self, value: str): raise WriteFolderError( - "Cannot write to input folder on it's own. Use the 'set_and_create_subfolders' " - "method to set the input folder" + "Cannot set input folder on it's own. Set the 'data_folder' instead" ) # ---- output folder (read only) ---- @@ -98,8 +99,7 @@ def output_folder(self) -> str: @output_folder.setter def output_folder(self, value: str): raise WriteFolderError( - "Cannot write to output folder on it's own. Use the 'set_and_create_subfolders' " - "method to set the output folder" + "Cannot set output folder on it's own. Set the 'data_folder' instead" ) # ---- media folder (read only) ---- @@ -111,8 +111,7 @@ def media_folder(self) -> str: @media_folder.setter def media_folder(self, value: str): raise WriteFolderError( - "Cannot write to media folder on it's own. Use the 'set_and_create_subfolders' " - "method to set the media folder" + "Cannot set media folder on it's own. Set the 'data_folder' instead" ) # ---- max queries ---- diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..e38e63ba --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,40 @@ +import os +from pathlib import Path + +import pytest + + +@pytest.fixture(autouse=True) +def change_test_dir(request, monkeypatch): + monkeypatch.chdir(request.fspath.dirname) + + +@pytest.fixture() +def temporary_data_folders(tmp_path): + """ + Creates a temporary folder structure for testing. + + Has the following structure: + tmp_path + ├── data/ + ├── dummy_data/ + ├── test.txt + """ + # create data folders + data_dir = Path(tmp_path / "data").mkdir() + dummy_data_dir = Path(tmp_path / "dummy_data").mkdir() + + # create a txt file in the folder + with open(Path(tmp_path / "test.txt"), "w") as f: + f.write("test line") + + # store current working directory + cwd = os.getcwd() + + # change to temporary directory + os.chdir(tmp_path) + + yield data_dir, dummy_data_dir + + # change back to original directory + os.chdir(cwd) diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/core/test_experiment_pipeline.py b/tests/core/test_experiment_pipeline.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py new file mode 100644 index 00000000..83db1671 --- /dev/null +++ b/tests/core/test_settings.py @@ -0,0 +1,258 @@ +import os + +import pytest + +from batch_llm.settings import Settings, WriteFolderError + + +def test_settings_default_init(temporary_data_folders): + settings = Settings() + + # check the default values + assert settings.data_folder == "data" + assert settings.max_queries == 10 + assert settings.max_attempts == 3 + + # check the subfolders + assert settings.input_folder == "data/input" + assert settings.output_folder == "data/output" + assert settings.media_folder == "data/media" + + # check the folders exist (should be created by initialising settings object) + assert os.path.isdir("data/input") + assert os.path.isdir("data/output") + assert os.path.isdir("data/media") + + +def test_settings_custom_init(temporary_data_folders): + settings = Settings(data_folder="dummy_data", max_queries=20, max_attempts=5) + + # check the custom values + assert settings.data_folder == "dummy_data" + assert settings.max_queries == 20 + assert settings.max_attempts == 5 + + # check the subfolders + assert settings.input_folder == "dummy_data/input" + assert settings.output_folder == "dummy_data/output" + assert settings.media_folder == "dummy_data/media" + + # check the folders exist (should be created by initialising settings object) + assert os.path.isdir("dummy_data/input") + assert os.path.isdir("dummy_data/output") + assert os.path.isdir("dummy_data/media") + + +def test_settings_str(temporary_data_folders): + settings = Settings() + + # when printing, it should show the settings and subfolders + assert str(settings) == ( + "Settings: data_folder=data, max_queries=10, max_attempts=3\n" + "Subfolders: input_folder=data/input, output_folder=data/output, media_folder=data/media" + ) + + +def test_settings_check_folder_exists(temporary_data_folders): + # call static method directly + Settings.check_folder_exists("dummy_data") + + # should raise a ValueError if the path does not exist + unknown_folder = "unknown_folder" + with pytest.raises( + ValueError, + match=f"Data folder '{unknown_folder}' must be a valid path to a folder", + ): + Settings.check_folder_exists(unknown_folder) + + # should raise a ValueError if the path is not a folder/directory + file_path = "test.txt" + with pytest.raises( + ValueError, match=f"Data folder '{file_path}' must be a valid path to a folder" + ): + Settings.check_folder_exists(file_path) + + +def test_settings_set_subfolders(temporary_data_folders): + settings = Settings() + + # set it to a different folder + # (manually without triggering the data_folder setter) + settings._data_folder = "dummy_data" + settings.set_subfolders() + + # check the subfolders have been set + assert settings.input_folder == "dummy_data/input" + assert settings.output_folder == "dummy_data/output" + assert settings.media_folder == "dummy_data/media" + + # check the folders do not exist yet + assert not os.path.isdir("dummy_data/input") + assert not os.path.isdir("dummy_data/output") + assert not os.path.isdir("dummy_data/media") + + +def test_settings_create_subfolders(temporary_data_folders): + settings = Settings() + + # set it to a different folder + # (manually without triggering the data_folder setter) + settings._data_folder = "dummy_data" + settings.set_subfolders() + settings.create_subfolders() + + # check the folders exist + assert os.path.isdir("dummy_data/input") + assert os.path.isdir("dummy_data/output") + assert os.path.isdir("dummy_data/media") + + +def test_settings_set_and_create_subfolders(temporary_data_folders): + settings = Settings() + + # set it to a different folder + # (manually without triggering the data_folder setter) + settings._data_folder = "dummy_data" + settings.set_and_create_subfolders() + + # check the subfolders have been set + assert settings.input_folder == "dummy_data/input" + assert settings.output_folder == "dummy_data/output" + assert settings.media_folder == "dummy_data/media" + + # check the folders exist + assert os.path.isdir("dummy_data/input") + assert os.path.isdir("dummy_data/output") + assert os.path.isdir("dummy_data/media") + + +def test_settings_data_folder_getter(temporary_data_folders): + settings = Settings() + assert settings.data_folder == "data" + + # set it to a different folder + settings._data_folder = "dummy_data" + assert settings.data_folder == "dummy_data" + + +def test_settings_data_folder_setter(temporary_data_folders): + settings = Settings() + + # set it to a different folder + # should trigger the subfolders to be set and created + settings.data_folder = "dummy_data" + assert settings.data_folder == "dummy_data" + + # check the subfolders have been set + assert settings.input_folder == "dummy_data/input" + assert settings.output_folder == "dummy_data/output" + assert settings.media_folder == "dummy_data/media" + + # check the folders exist + assert os.path.isdir("dummy_data/input") + assert os.path.isdir("dummy_data/output") + assert os.path.isdir("dummy_data/media") + + +def test_settings_input_folder_getter(temporary_data_folders): + settings = Settings() + assert settings.input_folder == "data/input" + + # set it to a different folder + # (manually circumventing the setter error) + settings._input_folder = "dummy_data/input" + assert settings.input_folder == "dummy_data/input" + + +def test_settings_input_folder_setter(temporary_data_folders): + settings = Settings() + + # should raise an error if trying to set it directly + # (should use the data_folder setter) + with pytest.raises( + WriteFolderError, + match="Cannot set input folder on it's own. Set the 'data_folder' instead", + ): + settings.input_folder = "dummy_data/input" + + +def test_settings_output_folder_getter(temporary_data_folders): + settings = Settings() + assert settings.output_folder == "data/output" + + # set it to a different folder + # (manually circumventing the setter error) + settings._output_folder = "dummy_data/output" + assert settings.output_folder == "dummy_data/output" + + +def test_settings_output_folder_setter(temporary_data_folders): + settings = Settings() + + # should raise an error if trying to set it directly + # (should use the data_folder setter) + with pytest.raises( + WriteFolderError, + match="Cannot set output folder on it's own. Set the 'data_folder' instead", + ): + settings.output_folder = "dummy_data/output" + + +def test_settings_media_folder_getter(temporary_data_folders): + settings = Settings() + assert settings.media_folder == "data/media" + + # set it to a different folder + # (manually circumventing the setter error) + settings._media_folder = "dummy_data/media" + assert settings.media_folder == "dummy_data/media" + + +def test_settings_media_folder_setter(temporary_data_folders): + settings = Settings() + + # should raise an error if trying to set it directly + # (should use the data_folder setter) + with pytest.raises( + WriteFolderError, + match="Cannot set media folder on it's own. Set the 'data_folder' instead", + ): + settings.media_folder = "dummy_data/media" + + +def test_max_queries_getter(temporary_data_folders): + settings = Settings() + + # check the default value + assert settings.max_queries == 10 + + # set it to a different value + settings._max_queries = 20 + assert settings.max_queries == 20 + + +def test_max_queries_setter(temporary_data_folders): + settings = Settings() + + # set it to a different value + settings.max_queries = 20 + assert settings.max_queries == 20 + + +def test_max_attempts_getter(temporary_data_folders): + settings = Settings() + + # check the default value + assert settings.max_attempts == 3 + + # set it to a different value + settings._max_attempts = 5 + assert settings.max_attempts == 5 + + +def test_max_attempts_setter(temporary_data_folders): + settings = Settings() + + # set it to a different value + settings.max_attempts = 5 + assert settings.max_attempts == 5 diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py new file mode 100644 index 00000000..e69de29b