diff --git a/.github/workflows/test-pytest.yml b/.github/workflows/test-pytest.yml index 59dc0cd1..e541a463 100644 --- a/.github/workflows/test-pytest.yml +++ b/.github/workflows/test-pytest.yml @@ -14,7 +14,7 @@ on: jobs: test: strategy: - fail-fast: true + fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: [ubuntu-latest, macos-latest, windows-latest] @@ -26,5 +26,5 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" - run: pip install ".[test]" - - run: pytest --hypothesis-profile ci + - run: pytest --hypothesis-profile ci --hypothesis-show-statistics diff --git a/conftest.py b/conftest.py index b02df288..5e87c092 100644 --- a/conftest.py +++ b/conftest.py @@ -1,4 +1,10 @@ -from hypothesis import Verbosity, settings +from hypothesis import HealthCheck, Verbosity, settings -settings.register_profile("ci", max_examples=1000) +settings.register_profile( + "ci", + verbosity=Verbosity.verbose, + max_examples=1000, + deadline=2000, + suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large], +) settings.register_profile("debug", max_examples=10, verbosity=Verbosity.verbose) diff --git a/tests/_superscript.py b/tests/_superscript.py new file mode 100644 index 00000000..208852a7 --- /dev/null +++ b/tests/_superscript.py @@ -0,0 +1,86 @@ +"""Functions to convert from normal to superscript characters. +After https://stackoverflow.com/questions/8651361/how-do-you-print-superscript/58612677#58612677 +with thanks to norok2. + + +Examples: + >>> "".translate(to_superscript) + '' + >>> "0".translate(to_superscript) + '⁰' + >>> "the quick brown fox jumps over the lazy dog".translate(to_superscript) + 'ᵗʰᵉ ۹ᵘᶦᶜᵏ ᵇʳᵒʷⁿ ᶠᵒˣ ʲᵘᵐᵖˢ ᵒᵛᵉʳ ᵗʰᵉ ˡᵃᶻʸ ᵈᵒᵍ' + >>> "CCV (FTW)".translate(to_superscript) + 'ᶜᶜⱽ ⁽ᶠᵀᵂ⁾' +""" + +superscript_map = { + "0": "⁰", + "1": "¹", + "2": "²", + "3": "³", + "4": "⁴", + "5": "⁵", + "6": "⁶", + "7": "⁷", + "8": "⁸", + "9": "⁹", + "a": "ᵃ", + "b": "ᵇ", + "c": "ᶜ", + "d": "ᵈ", + "e": "ᵉ", + "f": "ᶠ", + "g": "ᵍ", + "h": "ʰ", + "i": "ᶦ", + "j": "ʲ", + "k": "ᵏ", + "l": "ˡ", + "m": "ᵐ", + "n": "ⁿ", + "o": "ᵒ", + "p": "ᵖ", + "q": "۹", + "r": "ʳ", + "s": "ˢ", + "t": "ᵗ", + "u": "ᵘ", + "v": "ᵛ", + "w": "ʷ", + "x": "ˣ", + "y": "ʸ", + "z": "ᶻ", + "A": "ᴬ", + "B": "ᴮ", + "C": "ᶜ", + "D": "ᴰ", + "E": "ᴱ", + "F": "ᶠ", + "G": "ᴳ", + "H": "ᴴ", + "I": "ᴵ", + "J": "ᴶ", + "K": "ᴷ", + "L": "ᴸ", + "M": "ᴹ", + "N": "ᴺ", + "O": "ᴼ", + "P": "ᴾ", + "Q": "Q", + "R": "ᴿ", + "S": "ˢ", + "T": "ᵀ", + "U": "ᵁ", + "V": "ⱽ", + "W": "ᵂ", + "X": "ˣ", + "Y": "ʸ", + "Z": "ᶻ", + "+": "⁺", + "-": "⁻", + "=": "⁼", + "(": "⁽", + ")": "⁾", +} +to_superscript = str.maketrans(superscript_map) diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 7629b896..dd20bc32 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -43,11 +43,11 @@ def _load_dump_via_disk(o, module_name): serializer_dump_load_strategy = st.sampled_from( [ load_dump_pickle_string, - load_dump_dill_string, - load_dump_yaml_string, + # load_dump_dill_string, + # load_dump_yaml_string, load_dump_pickle_disk, - load_dump_dill_disk, - load_dump_yaml_disk, + # load_dump_dill_disk, + # load_dump_yaml_disk, ] ) diff --git a/tests/test_state.py b/tests/test_state.py index f89b52dc..f6305e02 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -1,18 +1,42 @@ import logging import pandas as pd -from hypothesis import HealthCheck, given, settings +from hypothesis import given +from pandas import DataFrame from autora.state import StandardState +from autora.variable import Variable, VariableCollection from .test_serializer import serializer_dump_load_strategy -from .test_strategies import standard_state_strategy +from .test_strategies import ( + dataframe_strategy, + standard_state_strategy, + variable_strategy, + variablecollection_strategy, +) logger = logging.getLogger(__name__) +@given(variable_strategy(), serializer_dump_load_strategy) +def test_variable_serialize_deserialize(o: Variable, dump_load): + o_loaded = dump_load(o) + assert o == o_loaded + + +@given(variablecollection_strategy(), serializer_dump_load_strategy) +def test_variablecollection_serialize_deserialize(o: VariableCollection, dump_load): + o_loaded = dump_load(o) + assert o == o_loaded + + +@given(dataframe_strategy(), serializer_dump_load_strategy) +def test_dataframe_serialize_deserialize(o: DataFrame, dump_load): + o_loaded = dump_load(o) + o.equals(o_loaded) + + @given(standard_state_strategy(), serializer_dump_load_strategy) -@settings(suppress_health_check={HealthCheck.too_slow}, deadline=1000) def test_state_serialize_deserialize(o: StandardState, dump_load): o_loaded = dump_load(o) assert o.variables == o_loaded.variables diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 8683762b..e7da3fd5 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -5,7 +5,7 @@ import pandas as pd import sklearn.dummy import sklearn.linear_model -from hypothesis import HealthCheck, given, settings +from hypothesis import given from hypothesis import strategies as st from hypothesis.extra import numpy as st_np from hypothesis.extra import pandas as st_pd @@ -13,6 +13,10 @@ from autora.state import StandardState from autora.variable import ValueType, Variable, VariableCollection +from ._superscript import to_superscript + +logger = logging.getLogger(__name__) + VALUE_TYPE_DTYPE_MAPPING = { ValueType.BOOLEAN: bool, ValueType.INTEGER: int, @@ -23,9 +27,6 @@ ValueType.PROBABILITY_DISTRIBUTION: float, ValueType.CLASS: str, } - -logger = logging.getLogger(__name__) - AVAILABLE_SKLEARN_MODELS_STRATEGY = st.sampled_from( [ sklearn.dummy.DummyRegressor, @@ -37,13 +38,117 @@ @st.composite -def _name_label_units_strategy(draw, name=None, label=None, units=None, covariate=None): +def variable_name(draw, max_size=16): + name = draw( + st.one_of( + st.sampled_from( + list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + ), + st.sampled_from(list("αβγδεζηθικλμνξοπρσςτυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ")), + st.text(min_size=1, max_size=max_size), + ) + ) + return name + + +@st.composite +def si_unit_with_power_full_strategy(draw): + base_unit = draw( + st.sampled_from( + [ + "metre", + "second", + "mole", + "Ampere", + "Kelvin", + "candela", + "gram", + ] + ) + ) + prefix = draw( + st.sampled_from( + [ + "", + "deca", + "deci", + "hecto", + "centi", + "kilo", + "milli", + "mega", + "micro", + "giga", + "nano", + "tera", + "pico", + ] + ) + ) + return prefix + base_unit + + +@st.composite +def si_unit_with_power_abbreviated_strategy(draw): + base_unit = draw( + st.sampled_from( + [ + "m", + "s", + "mol", + "A", + "K", + "cd", + "g", + ] + ) + ) + + i = draw(st.integers(min_value=-3, max_value=3).filter(lambda x: x != 0)) + if i == 1: + suffix = "" + else: + suffix = str(i).translate(to_superscript) + + return base_unit + suffix + + +@st.composite +def units_strategy(draw, max_size=16): + unit = draw( + st.one_of( + st.none(), + st.just(""), + st.just("unitless"), + si_unit_with_power_full_strategy(), # just latin charaters + si_unit_with_power_abbreviated_strategy(), # uses UTF-8 superscripts + st.text(min_size=1, max_size=max_size), # arbitrary characters + ) + ) + return unit + + +@st.composite +def _name_label_units_strategy( + draw, + name=None, + label=None, + units=None, + covariate=None, + name_max_length=4, + label_max_length=16, + units_max_length=4, +): if name is None: - name = draw(st.text(min_size=1)) + name = draw(variable_name(max_size=name_max_length)) if label is None: - label = draw(st.text(min_size=0)) + label = draw( + st.one_of( + st.none(), st.just(name), st.text(min_size=0, max_size=label_max_length) + ) + ) if units is None: - units = draw(st.text(min_size=0)) + units = draw(units_strategy(max_size=units_max_length)) if covariate is None: covariate = draw(st.booleans()) return name, label, units, covariate @@ -72,6 +177,11 @@ def variable_boolean_strategy(draw, name=None, label=None, units=None, covariate ) +@given(variable_boolean_strategy()) +def test_variable_boolean_strategy_creation(o): + assert o + + @st.composite def variable_integer_strategy(draw, name=None, label=None, units=None, covariate=None): name, label, units, covariate = draw( @@ -84,13 +194,13 @@ def variable_integer_strategy(draw, name=None, label=None, units=None, covariate value_range = draw( st.one_of( st.none(), - st.tuples(st.integers(), st.integers()) - .filter(lambda x: x[0] != x[1]) - .map(sorted), + st.tuples(st.integers(), st.integers()).map(sorted), ) ) if value_range is None: - allowed_values = draw(st.one_of(st.none(), st.sets(st.integers(), min_size=1))) + allowed_values = draw( + st.one_of(st.none(), st.lists(st.integers(), min_size=1, unique=True)) + ) else: allowed_values = None @@ -113,6 +223,11 @@ def variable_integer_strategy(draw, name=None, label=None, units=None, covariate ) +@given(variable_integer_strategy()) +def test_variable_integer_strategy_creation(o): + assert o + + @st.composite def variable_real_strategy(draw, name=None, label=None, units=None, covariate=None): name, label, units, covariate = draw( @@ -132,7 +247,9 @@ def variable_real_strategy(draw, name=None, label=None, units=None, covariate=No ) if value_range is None: - allowed_values = draw(st.one_of(st.none(), st.sets(range_strategy, min_size=1))) + allowed_values = draw( + st.one_of(st.none(), st.lists(range_strategy, min_size=1, unique=True)) + ) else: allowed_values = None rescale = draw(st.one_of(st.just(1), range_strategy)) @@ -148,6 +265,11 @@ def variable_real_strategy(draw, name=None, label=None, units=None, covariate=No ) +@given(variable_real_strategy()) +def test_variable_real_strategy_creation(o): + assert o + + @st.composite def variable_probability_strategy( draw, name=None, label=None, units=None, covariate=None @@ -173,6 +295,11 @@ def variable_probability_strategy( ) +@given(variable_probability_strategy()) +def test_variable_probability_strategy_creation(o): + assert o + + @st.composite def variable_probability_sample_strategy( draw, name=None, label=None, units=None, covariate=None @@ -198,6 +325,11 @@ def variable_probability_sample_strategy( ) +@given(variable_probability_sample_strategy()) +def test_variable_probability_sample_strategy_creation(o): + assert o + + @st.composite def variable_probability_distribution_strategy( draw, name=None, label=None, units=None, covariate=None @@ -223,6 +355,11 @@ def variable_probability_distribution_strategy( ) +@given(variable_probability_distribution_strategy()) +def test_variable_probability_distribution_strategy_creation(o): + assert o + + @st.composite def variable_sigmoid_strategy(draw, name=None, label=None, units=None, covariate=None): name, label, units, covariate = draw( @@ -246,8 +383,15 @@ def variable_sigmoid_strategy(draw, name=None, label=None, units=None, covariate ) +@given(variable_sigmoid_strategy()) +def test_variable_sigmoid_strategy_creation(o): + assert o + + @st.composite -def variable_class_strategy(draw, name=None, label=None, units=None, covariate=None): +def variable_class_strategy( + draw, name=None, label=None, units=None, covariate=None, class_name_max_length=2 +): name, label, units, covariate = draw( _name_label_units_strategy( name=name, label=label, units=units, covariate=covariate @@ -256,7 +400,9 @@ def variable_class_strategy(draw, name=None, label=None, units=None, covariate=N value_type = ValueType.CLASS value_range = None rescale = 1 - allowed_values = draw(st.lists(st.text(min_size=1, max_size=16), unique=True)) + allowed_values = draw( + st.lists(st.text(min_size=1, max_size=class_name_max_length), unique=True) + ) return Variable( name=name, variable_label=label, @@ -269,6 +415,11 @@ def variable_class_strategy(draw, name=None, label=None, units=None, covariate=N ) +@given(variable_class_strategy(class_name_max_length=32)) +def test_variable_class_strategy_creation(o): + assert o + + VARIABLE_STRATEGIES = ( variable_boolean_strategy, variable_integer_strategy, @@ -330,7 +481,7 @@ def variablecollection_strategy( names = draw( st.lists( - st.text(min_size=1, max_size=name_max_length), + variable_name(max_size=name_max_length), unique=True, min_size=n_variables, max_size=n_variables, @@ -388,14 +539,30 @@ def dataframe_strategy( + variable_collection.covariates ) - df: pd.DataFrame = draw( - st_pd.data_frames( - columns=[ - st_pd.column(name=v.name, dtype=VALUE_TYPE_DTYPE_MAPPING[v.type]) - for v in variables - ], - ) - ) + columns = [] + for v in variables: + dtype = VALUE_TYPE_DTYPE_MAPPING[v.type] + if v.allowed_values is not None and v.allowed_values != []: + c = st_pd.column(name=v.name, elements=st.sampled_from(v.allowed_values)) + elif v.value_range is not None and dtype is int: + c = st_pd.column( + name=v.name, + elements=st.integers( + min_value=v.value_range[0], max_value=v.value_range[1] + ), + ) + elif v.value_range is not None and dtype is float: + c = st_pd.column( + name=v.name, + elements=st.floats( + min_value=v.value_range[0], max_value=v.value_range[1] + ), + ) + else: + c = st_pd.column(name=v.name, dtype=dtype) + columns.append(c) + + df: pd.DataFrame = draw(st_pd.data_frames(columns=columns)) return df @@ -461,7 +628,6 @@ def standard_state_strategy(draw): return s -@settings(suppress_health_check={HealthCheck.too_slow}) @given(standard_state_strategy()) def test_standard_state_strategy_creation(o): assert o diff --git a/tests/test_variable.py b/tests/test_variable.py index aa85556f..c4650347 100644 --- a/tests/test_variable.py +++ b/tests/test_variable.py @@ -1,6 +1,6 @@ import logging -from hypothesis import given, settings +from hypothesis import given from hypothesis import strategies as st from .test_serializer import serializer_dump_load_strategy @@ -16,7 +16,6 @@ ), serializer_dump_load_strategy, ) -@settings(deadline=1000) def test_variable_serialize_deserialize(o, dump_load): o_loaded = dump_load(o) assert o_loaded == o diff --git a/tests/test_workflow.py b/tests/test_workflow.py index ba922639..643424d4 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -72,7 +72,6 @@ def test_e2e_nominal(workflow_library_module): st.booleans(), st.booleans(), ) -@settings(verbosity=Verbosity.verbose, deadline=1000) def test_e2e_serializers(workflow_library_module, serializer, verbose, debug): """Test a basic standard chain of CLI calls using a single serializer."""