diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 445a259..5744e99 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,9 +8,10 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: [ubuntu-latest, macos-latest] python-version: ["3.11", "3.12"] steps: @@ -30,7 +31,15 @@ jobs: - run: echo "$PWD/.venv/bin" >> $GITHUB_PATH - - name: Test - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: ./scripts/run_tests.sh + - run: pytest tests/ --junitxml=junit/test-results-${{ matrix.os }}-${{ matrix.python-version }}.xml --cov=ml_instrumentation --cov-report=html:coverage/cov-${{ matrix.os }}-${{ matrix.python-version }}.html + + - name: Upload pytest test results + uses: actions/upload-artifact@v4 + with: + name: pytest-results-${{ matrix.os }}-${{ matrix.python-version }} + path: | + junit/test-results-${{ matrix.os }}-${{ matrix.python-version }}.xml + coverage/cov-${{ matrix.os }}-${{ matrix.python-version }}.html + + # Use always() to always run this step to publish test results when there are test failures + if: ${{ always() }} diff --git a/pyproject.toml b/pyproject.toml index 81371d8..f13aaf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,8 @@ license = {text = "MIT"} dev = [ "pip", "ruff", + "pytest", + "pytest-cov", "commitizen", "pre-commit", "matplotlib", diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh deleted file mode 100755 index 49ea816..0000000 --- a/scripts/run_tests.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -# pyright --stats - -export PYTHONPATH=RlEvaluation -python3 -m unittest discover -p "*test_*.py" diff --git a/tests/_utils/test_data.py b/tests/_utils/test_data.py index 238478e..fe1d7b4 100644 --- a/tests/_utils/test_data.py +++ b/tests/_utils/test_data.py @@ -1,62 +1,60 @@ -import unittest import numpy as np import pandas as pd from rlevaluation._utils.data import normalizeDataType, make_wide_format, is_wide_format from tests.test_utils.mock_data import generate_split_over_seed -class TestData(unittest.TestCase): - def test_normalizeDataType(self): - # turn pandas dataframe into a numpy array - test_data = pd.DataFrame({ - 'alpha': [0.01, 0.01, 0.1], - 'results': [1, 2, 3], - }) - got = normalizeDataType(test_data, 2, 'results') - self.assertIsInstance(got, np.ndarray) - self.assertEqual(np.ndim(got), 2) - - # keep numpy array untouched - test_data = np.array([ - [1, 2, 3], - [2, 3, 4], - [3, 4, 5], - ]) - got = normalizeDataType(test_data, 2) - self.assertIsInstance(got, np.ndarray) - self.assertEqual(np.ndim(got), 2) - - # TODO: test shape normalization - - def test_make_wide_format(self): - # works for one results column - df = generate_split_over_seed() - - hypers = {'stepsize', 'optimizer'} - metrics = {'results'} - - got = make_wide_format(df, hypers=hypers, metrics=metrics, seed_col='run') - - self.assertEqual(len(got), 6) - self.assertEqual(got.iloc[0]['results'].shape, (10, 300)) - - # works for two results columns - df2 = df.copy() - df2['results-2'] = df2['results'] * 2 - metrics = {'results', 'results-2'} - - got = make_wide_format(df2, hypers=hypers, metrics=metrics, seed_col='run') - - self.assertEqual(len(got), 6) - self.assertEqual(got.iloc[0]['results'].shape, (10, 300)) - self.assertEqual(got.iloc[0]['results-2'].shape, (10, 300)) - - # should not change already wide data - self.assertFalse(is_wide_format(df, metrics, 'run')) - self.assertFalse(is_wide_format(df2, metrics, 'run')) - - got = make_wide_format(df2, hypers=hypers, metrics=metrics, seed_col='run') - self.assertTrue(is_wide_format(got, metrics, 'run')) - - got2 = make_wide_format(got, hypers=hypers, metrics=metrics, seed_col='run') - self.assertEqual(id(got), id(got2)) +def test_normalizeDataType(): + # turn pandas dataframe into a numpy array + test_data = pd.DataFrame({ + 'alpha': [0.01, 0.01, 0.1], + 'results': [1, 2, 3], + }) + got = normalizeDataType(test_data, 2, 'results') + assert isinstance(got, np.ndarray) + assert np.ndim(got) == 2 + + # keep numpy array untouched + test_data = np.array([ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + ]) + got = normalizeDataType(test_data, 2) + assert isinstance(got, np.ndarray) + assert np.ndim(got) == 2 + + # TODO: test shape normalization + +def test_make_wide_format(): + # works for one results column + df = generate_split_over_seed() + + hypers = {'stepsize', 'optimizer'} + metrics = {'results'} + + got = make_wide_format(df, hypers=hypers, metrics=metrics, seed_col='run') + + assert len(got) == 6 + assert got.iloc[0]['results'].shape == (10, 300) + + # works for two results columns + df2 = df.copy() + df2['results-2'] = df2['results'] * 2 + metrics = {'results', 'results-2'} + + got = make_wide_format(df2, hypers=hypers, metrics=metrics, seed_col='run') + + assert len(got) == 6 + assert got.iloc[0]['results'].shape == (10, 300) + assert got.iloc[0]['results-2'].shape == (10, 300) + + # should not change already wide data + assert not is_wide_format(df, metrics, 'run') + assert not is_wide_format(df2, metrics, 'run') + + got = make_wide_format(df2, hypers=hypers, metrics=metrics, seed_col='run') + assert is_wide_format(got, metrics, 'run') + + got2 = make_wide_format(got, hypers=hypers, metrics=metrics, seed_col='run') + assert id(got) == id(got2) diff --git a/tests/test_hypers.py b/tests/test_hypers.py index 3ff08de..e21c3fa 100644 --- a/tests/test_hypers.py +++ b/tests/test_hypers.py @@ -1,18 +1,16 @@ -import unittest import pandas as pd from rlevaluation.hypers import select_best_hypers, Preference from rlevaluation.config import data_definition -class TestHypers(unittest.TestCase): - def test_select_best_hypers(self): - test_data = pd.DataFrame({ - 'alpha': [0.1, 0.01, 0.001], - 'seed': [0, 0, 0], - 'result': [0, 2, 1], - }) +def test_select_best_hypers(): + test_data = pd.DataFrame({ + 'alpha': [0.1, 0.01, 0.001], + 'seed': [0, 0, 0], + 'result': [0, 2, 1], + }) - d = data_definition(hyper_cols=['alpha']) + d = data_definition(hyper_cols=['alpha']) - best = select_best_hypers(test_data, 'result', Preference.high, data_definition=d) - self.assertEqual(best.best_configuration[0], 0.01) + best = select_best_hypers(test_data, 'result', Preference.high, data_definition=d) + assert best.best_configuration[0] == 0.01