From d911f29f67fd0a9f4b02dc8de7e3444880ca8222 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Silva Date: Fri, 26 Jul 2024 19:14:58 +0100 Subject: [PATCH] Added tests for determination of related and uncertainty datasets (#189) --- tests/conftest.py | 119 +++++++++++++++++++++++++ tests/notebooks/generic.ipynb | 128 ++++++++++++++++++++++---- tests/test_operations.py | 163 +++++++++++++++++++++++++++++++++- 3 files changed, 388 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 83c81a2e..e5c9af0c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,15 @@ from arpav_ppcv.webapp.app import create_app_from_settings from arpav_ppcv.webapp.legacy.django_settings import get_custom_django_settings from arpav_ppcv.webapp.api_v2.app import create_app as create_v2_app +from arpav_ppcv.bootstrapper.configurationparameters import ( + generate_configuration_parameters as bootstrappable_configuration_parameters, +) +from arpav_ppcv.bootstrapper.coverage_configurations import ( + tas as tas_bootstrappable_configurations, +) +from arpav_ppcv.bootstrapper.variables import ( + generate_variable_configurations as bootstrappable_variables, +) @pytest.hookimpl @@ -160,6 +169,14 @@ def sample_variables(arpav_db_session) -> list[observations.Variable]: return db_variables +@pytest.fixture() +def sample_real_variables(arpav_db_session) -> list[observations.Variable]: + created = [] + for var_to_create in bootstrappable_variables(): + created.append(database.create_variable(arpav_db_session, var_to_create)) + return created + + @pytest.fixture() def sample_monthly_measurements( arpav_db_session, sample_variables, sample_stations @@ -191,6 +208,18 @@ def sample_monthly_measurements( return db_monthly_measurements +@pytest.fixture() +def sample_real_configuration_parameters(arpav_db_session): + params_to_create = bootstrappable_configuration_parameters() + created_params = [] + for param_to_create in params_to_create: + created_param = database.create_configuration_parameter( + arpav_db_session, param_to_create + ) + created_params.append(created_param) + return created_params + + @pytest.fixture() def sample_configuration_parameters(arpav_db_session): db_conf_params = [] @@ -253,6 +282,96 @@ def sample_coverage_configurations( return db_cov_confs +@pytest.fixture() +def sample_real_coverage_configurations( + arpav_db_session, + sample_real_configuration_parameters, + sample_real_variables, +): + all_vars = database.collect_all_variables(arpav_db_session) + all_conf_param_values = database.collect_all_configuration_parameter_values( + arpav_db_session + ) + cov_confs_to_create = tas_bootstrappable_configurations.generate_configurations( + conf_param_values={ + (pv.configuration_parameter.name, pv.name): pv + for pv in all_conf_param_values + }, + variables={v.name: v for v in all_vars}, + ) + created_cov_confs = {} + for cov_conf_to_create in cov_confs_to_create: + cov_conf = database.create_coverage_configuration( + arpav_db_session, cov_conf_to_create + ) + created_cov_confs[cov_conf.name] = cov_conf + + to_update = {} + for name, related_names in { + **tas_bootstrappable_configurations.get_related_map(), + }.items(): + to_update[name] = { + "related": related_names, + } + + for name, uncertainties in { + **tas_bootstrappable_configurations.get_uncertainty_map(), + }.items(): + info = to_update.setdefault(name, {}) + info["uncertainties"] = uncertainties + for name, info in to_update.items(): + main_cov_conf = created_cov_confs[name] + secondaries = info.get("related") + uncertainties = info.get("uncertainties") + update_kwargs = {} + if secondaries is not None: + secondary_cov_confs = [ + cc for name, cc in created_cov_confs.items() if name in secondaries + ] + update_kwargs["secondary_coverage_configurations_ids"] = [ + cc.id for cc in secondary_cov_confs + ] + else: + update_kwargs["secondary_coverage_configurations_ids"] = [] + if uncertainties is not None: + lower_uncert_id = [ + cc.id + for name, cc in created_cov_confs.items() + if name == uncertainties[0] + ][0] + upper_uncert_id = [ + cc.id + for name, cc in created_cov_confs.items() + if name == uncertainties[1] + ][0] + update_kwargs.update( + uncertainty_lower_bounds_coverage_configuration_id=lower_uncert_id, + uncertainty_upper_bounds_coverage_configuration_id=upper_uncert_id, + ) + cov_update = coverages.CoverageConfigurationUpdate( + **main_cov_conf.model_dump( + exclude={ + "uncertainty_lower_bounds_coverage_configuration_id", + "uncertainty_upper_bounds_coverage_configuration_id", + "secondary_coverage_configurations_ids", + "possible_values", + } + ), + **update_kwargs, + possible_values=[ + coverages.ConfigurationParameterPossibleValueUpdate( + configuration_parameter_value_id=pv.configuration_parameter_value_id + ) + for pv in main_cov_conf.possible_values + ], + ) + database.update_coverage_configuration( + arpav_db_session, + main_cov_conf, + cov_update, + ) + + @pytest.fixture() def sample_tas_csv_data(): return """ diff --git a/tests/notebooks/generic.ipynb b/tests/notebooks/generic.ipynb index 822cbec5..ac74a756 100644 --- a/tests/notebooks/generic.ipynb +++ b/tests/notebooks/generic.ipynb @@ -55,7 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "coverage_identifier = \"tas_seasonal_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-DJF\"\n", + "coverage_identifier = \"tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-MAM\"\n", "point_coords = \"POINT(11.5469 44.9524)\"\n", "date_range = \"../..\"\n", "\n", @@ -67,60 +67,83 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "974c6dfa-7d87-4388-9ef5-dd29fb855919", + "execution_count": 4, + "id": "48b441be-581b-48d6-86a4-11ea7a36b795", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'tas_seasonal_absolute_model_ensemble_upper_uncertainty-annual-model_ensemble-tas-absolute-rcp26-upper_bound-DJF'" + "['tas_seasonal_anomaly_model_ensemble_lower_uncertainty-annual-model_ensemble-tas-anomaly-rcp26-lower_bound-MAM',\n", + " 'tas_seasonal_anomaly_model_ensemble_upper_uncertainty-annual-model_ensemble-tas-anomaly-rcp26-upper_bound-MAM']" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "operations.get_related_uncertainty_coverage_configurations(session, cov)[1].identifier" + "[c.identifier for c in operations.get_related_uncertainty_coverage_configurations(session, cov)]" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "33991a12-8e81-4de0-8812-057b25a6296f", + "execution_count": 3, + "id": "82b5c65f-afe2-4b83-b3a0-54d14a5ea3e5", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id_parts=['annual', 'model_ensemble', 'tas', 'anomaly', 'rcp26', 'MAM']\n", + "about to process the found pattern_parts...\n", + "checking configuration_parameter_name='aggregation_period'...\n", + "id_part='annual'\n", + "checking configuration_parameter_name='climatological_model'...\n", + "id_part='model_ensemble'\n", + "checking configuration_parameter_name='climatological_variable'...\n", + "id_part='tas'\n", + "checking configuration_parameter_name='measure'...\n", + "id_part='anomaly'\n", + "checking configuration_parameter_name='scenario'...\n", + "id_part='rcp26'\n", + "checking configuration_parameter_name='year_period'...\n", + "id_part='MAM'\n", + "result={'aggregation_period': 'annual', 'climatological_model': 'model_ensemble', 'climatological_variable': 'tas', 'measure': 'anomaly', 'scenario': 'rcp26', 'year_period': 'MAM'}\n" + ] + }, { "data": { "text/plain": [ - "CoverageConfiguration(description_english='Average daily air temperature near the ground', color_scale_max=32.0, description_italian=\"Temperatura media giornaliera dell'aria vicino al suolo\", observation_variable_id=None, netcdf_main_dataset_name='tas_stddown', observation_variable_aggregation_type=None, thredds_url_pattern='ensymbc/std/clipped/tas_stddown_{scenario}_{year_period}_ts19762100_ls_VFVG.nc', uncertainty_lower_bounds_coverage_configuration_id=None, name='tas_seasonal_absolute_model_ensemble_lower_uncertainty', wms_main_layer_name='tas_stddown', uncertainty_upper_bounds_coverage_configuration_id=None, id=UUID('37ec5342-7eba-4d99-831e-b8e027577ec0'), unit='ÂșC', display_name_english='Mean temperature', palette='default/seq-YlOrRd', display_name_italian='Temperatura media', color_scale_min=-3.0, coverage_id_pattern='{name}-{aggregation_period}-{climatological_model}-{climatological_variable}-{measure}-{scenario}-{uncertainty_type}-{year_period}')" + "{'aggregation_period': 'annual',\n", + " 'climatological_model': 'model_ensemble',\n", + " 'climatological_variable': 'tas',\n", + " 'measure': 'anomaly',\n", + " 'scenario': 'rcp26',\n", + " 'year_period': 'MAM'}" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "cov.configuration.uncertainty_lower_bounds_coverage_configuration" + "cov.configuration.retrieve_configuration_parameters(cov.identifier)" ] }, { "cell_type": "code", "execution_count": 4, - "id": "e2b7ec11-5168-42dd-8992-cb98f1f4def3", + "id": "f829643d-7e9c-40f4-b352-ea4d74e4ed2f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['tas_seasonal_absolute_model_ec_earth_cclm4_8_17-annual-ec_earth_cclm_4_8_17-tas-absolute-rcp26-DJF',\n", - " 'tas_seasonal_absolute_model_ec_earth_racmo22e-annual-ec_earth_racmo22e-tas-absolute-rcp26-DJF',\n", - " 'tas_seasonal_absolute_model_ec_earth_rca4-annual-ec_earth_rca4-tas-absolute-rcp26-DJF',\n", - " 'tas_seasonal_absolute_model_hadgem2_es_racmo22e-annual-hadgem2_racmo22e-tas-absolute-rcp26-DJF',\n", - " 'tas_seasonal_absolute_model_mpi_esm_lr_remo2009-annual-mpi_esm_lr_remo2009-tas-absolute-rcp26-DJF']" + "'{name}-{aggregation_period}-{climatological_model}-{climatological_variable}-{measure}-{scenario}-{year_period}'" ] }, "execution_count": 4, @@ -129,8 +152,77 @@ } ], "source": [ - "[c.identifier for c in operations.get_related_coverages(cov)]" + "cov.configuration.coverage_id_pattern" ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5ae32788-669a-4903-8222-c694d4dacf3e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-DJF',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-MAM',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-JJA',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-SON',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp45-DJF',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp45-MAM',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp45-JJA',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp45-SON',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp85-DJF',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp85-MAM',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp85-JJA',\n", + " 'tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp85-SON']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.generate_coverage_identifiers(cov.configuration)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "366393b0-cea5-4a2b-bd3a-13316b2c85d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year tas_annual_absolute_model_ec_earth_cclm4_8_17-annual-ec_earth_cclm_4_8_17-tas-absolute-rcp26-year\n", + "----------\n", + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year tas_annual_absolute_model_ec_earth_racmo22e-annual-ec_earth_racmo22e-tas-absolute-rcp26-year\n", + "----------\n", + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year tas_annual_absolute_model_ec_earth_rca4-annual-ec_earth_rca4-tas-absolute-rcp26-year\n", + "----------\n", + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year tas_annual_absolute_model_hadgem2_es_racmo22e-annual-hadgem2_racmo22e-tas-absolute-rcp26-year\n", + "----------\n", + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year tas_annual_absolute_model_mpi_esm_lr_remo2009-annual-mpi_esm_lr_remo2009-tas-absolute-rcp26-year\n", + "----------\n" + ] + } + ], + "source": [ + "for related_cov in operations.get_related_coverages(cov):\n", + " print(cov.identifier, related_cov.identifier)\n", + " print(\"----------\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b50e512-cff5-4f30-8576-9e2ea92a2948", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/tests/test_operations.py b/tests/test_operations.py index c5ec6405..eb8d46e4 100644 --- a/tests/test_operations.py +++ b/tests/test_operations.py @@ -1,11 +1,20 @@ import datetime as dt import pytest -from arpav_ppcv import operations +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, + is_float_dtype, +) + +from arpav_ppcv import ( + database, + operations, +) +from arpav_ppcv.schemas import coverages @pytest.mark.parametrize( - "range, expected", + "temporal_range, expected", [ pytest.param("../..", (None, None)), pytest.param( @@ -22,6 +31,152 @@ ), ], ) -def test_parse_temporal_range(range, expected): - result = operations._parse_temporal_range(range) +def test_parse_temporal_range(temporal_range, expected): + result = operations._parse_temporal_range(temporal_range) assert result == expected + + +@pytest.mark.parametrize( + [ + "time_start", + "time_end", + "expected_first_tas_value", + "expected_last_tas_value", + "expected_first_dt_value", + "expected_last_dt_value", + ], + [ + pytest.param( + None, + None, + 2.640222, + 3.911859, + "1976-02-15T12:00:00+00:00", + "2017-02-14T16:00:00+00:00", + ), + pytest.param( + dt.datetime(1980, 1, 1), + None, + 3.5937133, + 3.911859, + "1980-02-15T11:48:17.561000+00:00", + "2017-02-14T16:00:00+00:00", + ), + pytest.param( + None, + dt.datetime(2000, 1, 1), + 2.640222, + 4.3369384, + "1976-02-15T12:00:00+00:00", + "1999-02-15T04:52:40.976000+00:00", + ), + pytest.param( + dt.datetime(1980, 1, 1), + dt.datetime(2000, 1, 1), + 3.5937133, + 4.3369384, + "1980-02-15T11:48:17.561000+00:00", + "1999-02-15T04:52:40.976000+00:00", + ), + ], +) +def test_parse_ncss_dataset( + sample_tas_csv_data, + time_start, + time_end, + expected_first_tas_value, + expected_last_tas_value, + expected_first_dt_value, + expected_last_dt_value, +): + result = operations._parse_ncss_dataset( + raw_data=sample_tas_csv_data, + source_main_ds_name="tas", + time_start=time_start, + time_end=time_end, + target_main_ds_name="tas", + ) + assert result.index.name == "time" + assert is_datetime64_ns_dtype(result.index.dtype) + assert is_float_dtype(result.tas) + assert result.index[0].isoformat() == expected_first_dt_value + assert result.index[-1].isoformat() == expected_last_dt_value + assert result.tas[0] == pytest.approx(expected_first_tas_value) + assert result.tas[-1] == pytest.approx(expected_last_tas_value) + + +@pytest.mark.parametrize( + "cov_conf_name, cov_identifier, expected_lower_identifier, expected_upper_identifier", + [ + pytest.param( + "tas_seasonal_anomaly_model_ensemble", + "tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-MAM", + "tas_seasonal_anomaly_model_ensemble_lower_uncertainty-annual-model_ensemble-tas-anomaly-rcp26-lower_bound-MAM", + "tas_seasonal_anomaly_model_ensemble_upper_uncertainty-annual-model_ensemble-tas-anomaly-rcp26-upper_bound-MAM", + ), + pytest.param( + "tas_annual_absolute_model_ensemble", + "tas_annual_absolute_model_ensemble-annual-model_ensemble-tas-absolute-rcp26-year", + "tas_annual_absolute_model_ensemble_lower_uncertainty-annual-model_ensemble-tas-absolute-rcp26-lower_bound-year", + "tas_annual_absolute_model_ensemble_upper_uncertainty-annual-model_ensemble-tas-absolute-rcp26-upper_bound-year", + ), + ], +) +def test_get_related_uncertainty_coverage_configurations( + arpav_db_session, + sample_real_coverage_configurations: list[coverages.CoverageConfiguration], # noqa + cov_conf_name, + cov_identifier, + expected_lower_identifier, + expected_upper_identifier, +): + cov_conf = database.get_coverage_configuration_by_name( + arpav_db_session, cov_conf_name + ) + lower_, upper_ = operations.get_related_uncertainty_coverage_configurations( + arpav_db_session, + coverage=coverages.CoverageInternal( + configuration=cov_conf, identifier=cov_identifier + ), + ) + assert lower_.identifier == expected_lower_identifier + assert upper_.identifier == expected_upper_identifier + + +@pytest.mark.parametrize( + "cov_conf_name, cov_identifier, expected_related_coverage_identifiers", + [ + pytest.param( + "tas_seasonal_anomaly_model_ensemble", + "tas_seasonal_anomaly_model_ensemble-annual-model_ensemble-tas-anomaly-rcp26-MAM", + [ + "tas_seasonal_anomaly_model_ec_earth_cclm4_8_17-annual-ec_earth_cclm_4_8_17-tas-anomaly-rcp26-MAM", + "tas_seasonal_anomaly_model_ec_earth_racmo22e-annual-ec_earth_racmo22e-tas-anomaly-rcp26-MAM", + "tas_seasonal_anomaly_model_ec_earth_rca4-annual-ec_earth_rca4-tas-anomaly-rcp26-MAM", + "tas_seasonal_anomaly_model_hadgem2_es_racmo22e-annual-hadgem2_racmo22e-tas-anomaly-rcp26-MAM", + "tas_seasonal_anomaly_model_mpi_esm_lr_remo2009-annual-mpi_esm_lr_remo2009-tas-anomaly-rcp26-MAM", + ], + ) + ], +) +def test_get_related_coverage_configurations( + arpav_db_session, + sample_real_coverage_configurations: list[coverages.CoverageConfiguration], # noqa + cov_conf_name, + cov_identifier, + expected_related_coverage_identifiers, +): + cov_conf = database.get_coverage_configuration_by_name( + arpav_db_session, cov_conf_name + ) + related = operations.get_related_coverages( + coverage=coverages.CoverageInternal( + configuration=cov_conf, identifier=cov_identifier + ) + ) + related_cov_identifiers = [c.identifier for c in related] + for expected_identifier in expected_related_coverage_identifiers: + assert expected_identifier in related_cov_identifiers + + for found_identifier in related_cov_identifiers: + assert found_identifier in expected_related_coverage_identifiers