From e3eb9ff8797c500dda33a4469fc38a5893994884 Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Tue, 4 Feb 2025 15:36:05 +0100 Subject: [PATCH] Allow using multi model statistics preprocessor on datasets without `timerange` (#2644) --- esmvalcore/_recipe/recipe.py | 20 ++-- esmvalcore/local.py | 10 +- tests/unit/recipe/test_recipe.py | 157 +++++++++++++++++++++++-------- 3 files changed, 136 insertions(+), 51 deletions(-) diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index 2128d82cdb..d0b028ea22 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -367,12 +367,14 @@ def _get_common_attributes(products, settings): if all(p.attributes.get(key, object()) == value for p in products): attributes[key] = value - # Ensure that attribute timerange is always available. This depends on the - # "span" setting: if "span=overlap", the intersection of all periods is - # used; if "span=full", the union is used. The default value for "span" is - # "overlap". + # Ensure that attribute timerange is always available if at least one of + # the input datasets defines it. This depends on the "span" setting: if + # "span=overlap", the intersection of all periods is used; if "span=full", + # the union is used. The default value for "span" is "overlap". span = settings.get("span", "overlap") for product in products: + if "timerange" not in product.attributes: + continue timerange = product.attributes["timerange"] start, end = _parse_period(timerange) if "timerange" not in attributes: @@ -397,10 +399,12 @@ def _get_common_attributes(products, settings): attributes["timerange"] = _dates_to_timerange(start_date, end_date) - # Ensure that attributes start_year and end_year are always available - start_year, end_year = _parse_period(attributes["timerange"]) - attributes["start_year"] = int(str(start_year[0:4])) - attributes["end_year"] = int(str(end_year[0:4])) + # Ensure that attributes start_year and end_year are always available if at + # least one of the input datasets defines it + if "timerange" in attributes: + start_year, end_year = _parse_period(attributes["timerange"]) + attributes["start_year"] = int(str(start_year[0:4])) + attributes["end_year"] = int(str(end_year[0:4])) return attributes diff --git a/esmvalcore/local.py b/esmvalcore/local.py index 41cf424476..fc912e734f 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -539,14 +539,18 @@ def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: # Remove duplicate segments: filename_segments = list(dict.fromkeys(filename_segments)) - # Add period and extension - filename_segments.append(f"{attributes['timerange'].replace('/', '-')}.nc") + # Add time period if possible + if "timerange" in attributes: + filename_segments.append( + f"{attributes['timerange'].replace('/', '-')}" + ) + filename = f"{'_'.join(filename_segments)}.nc" outfile = Path( preproc_dir, attributes["diagnostic"], attributes["variable_group"], - "_".join(filename_segments), + filename, ) return outfile diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py index 5acc625c8d..d508fe9f9c 100644 --- a/tests/unit/recipe/test_recipe.py +++ b/tests/unit/recipe/test_recipe.py @@ -241,6 +241,19 @@ def test_multi_model_filename_full(): assert attributes["end_year"] == 1992 +@pytest.mark.parametrize("span", ["full", "overlap"]) +def test_multi_model_filename_no_timerange(span): + """Test timerange in multi-model filename is correct.""" + cube = iris.cube.Cube(np.array([1])) + products = [ + PreprocessorFile(cube, "A", {}), + PreprocessorFile(cube, "B", {}), + ] + settings = {"span": span} + attributes = _recipe._get_common_attributes(products, settings) + assert "timerange" not in attributes + + def test_update_multiproduct_multi_model_statistics(): """Test ``_update_multiproduct``.""" settings = { @@ -317,15 +330,15 @@ def test_update_multiproduct_multi_model_statistics(): for attr in common_attributes: assert attr in product.attributes assert product.attributes[attr] == common_attributes[attr] - assert "alias" in product.attributes - assert "dataset" in product.attributes - assert "multi_model_statistics" in product.attributes - assert "timerange" in product.attributes - assert product.attributes["timerange"] == "2002/2004" - assert "start_year" in product.attributes - assert product.attributes["start_year"] == 2002 - assert "end_year" in product.attributes - assert product.attributes["end_year"] == 2004 + assert "alias" in product.attributes + assert "dataset" in product.attributes + assert "multi_model_statistics" in product.attributes + assert "timerange" in product.attributes + assert product.attributes["timerange"] == "2002/2004" + assert "start_year" in product.attributes + assert product.attributes["start_year"] == 2002 + assert "end_year" in product.attributes + assert product.attributes["end_year"] == 2004 if "MultiModelStd_Dev" in str(product.filename): assert product.attributes["alias"] == "MultiModelStd_Dev" assert product.attributes["dataset"] == "MultiModelStd_Dev" @@ -352,6 +365,72 @@ def test_update_multiproduct_multi_model_statistics(): assert "MultiModelStd_Dev" in str(stats["std_dev"].filename) +def test_update_multiproduct_no_timerange(): + """Test ``_update_multiproduct``.""" + settings = { + "multi_model_statistics": {"statistics": ["mean"]}, + "save": {"compute": False}, + } + common_attributes = { + "project": "CMIP6", + "diagnostic": "d", + "variable_group": "var", + } + cube = iris.cube.Cube(np.array([1])) + products = [ + PreprocessorFile( + cube, + "A", + attributes={ + "dataset": "a", + **common_attributes, + }, + settings=settings, + ), + PreprocessorFile( + cube, + "B", + attributes={ + "dataset": "b", + **common_attributes, + }, + settings=settings, + ), + ] + order = ("load", "multi_model_statistics", "save") + preproc_dir = "/preproc" + step = "multi_model_statistics" + output, settings = _recipe._update_multiproduct( + products, order, preproc_dir, step + ) + + assert len(output) == 1 + product = list(output)[0] + + assert product.filename == Path("/preproc/d/var/CMIP6_MultiModelMean.nc") + + for attr in common_attributes: + assert attr in product.attributes + assert product.attributes[attr] == common_attributes[attr] + assert "alias" in product.attributes + assert "dataset" in product.attributes + assert "multi_model_statistics" in product.attributes + assert "timerange" not in product.attributes + assert "start_year" not in product.attributes + assert "end_year" not in product.attributes + assert product.attributes["alias"] == "MultiModelMean" + assert product.attributes["dataset"] == "MultiModelMean" + assert product.attributes["multi_model_statistics"] == "MultiModelMean" + + assert len(settings) == 1 + output_products = settings["output_products"] + assert len(output_products) == 1 + stats = output_products[""] + assert len(stats) == 1 + assert "mean" in stats + assert "MultiModelMean" in str(stats["mean"].filename) + + def test_update_multiproduct_multi_model_statistics_percentile(): """Test ``_update_multiproduct``.""" settings = { @@ -434,15 +513,15 @@ def test_update_multiproduct_multi_model_statistics_percentile(): for attr in common_attributes: assert attr in product.attributes assert product.attributes[attr] == common_attributes[attr] - assert "alias" in product.attributes - assert "dataset" in product.attributes - assert "multi_model_statistics" in product.attributes - assert "timerange" in product.attributes - assert product.attributes["timerange"] == "2002/2004" - assert "start_year" in product.attributes - assert product.attributes["start_year"] == 2002 - assert "end_year" in product.attributes - assert product.attributes["end_year"] == 2004 + assert "alias" in product.attributes + assert "dataset" in product.attributes + assert "multi_model_statistics" in product.attributes + assert "timerange" in product.attributes + assert product.attributes["timerange"] == "2002/2004" + assert "start_year" in product.attributes + assert product.attributes["start_year"] == 2002 + assert "end_year" in product.attributes + assert product.attributes["end_year"] == 2004 if "MultiModelPercentile5-0" in str(product.filename): assert product.attributes["alias"] == "MultiModelPercentile5-0" assert product.attributes["dataset"] == "MultiModelPercentile5-0" @@ -513,16 +592,16 @@ def test_update_multiproduct_ensemble_statistics(): for attr in common_attributes: assert attr in product.attributes assert product.attributes[attr] == common_attributes[attr] - assert "alias" in product.attributes - assert product.attributes["alias"] == "EnsembleMedian" - assert "dataset" in product.attributes - assert product.attributes["dataset"] == "CanESM2" - assert "ensemble_statistics" in product.attributes - assert product.attributes["ensemble_statistics"] == "EnsembleMedian" - assert "start_year" in product.attributes - assert product.attributes["start_year"] == 2000 - assert "end_year" in product.attributes - assert product.attributes["end_year"] == 2000 + assert "alias" in product.attributes + assert product.attributes["alias"] == "EnsembleMedian" + assert "dataset" in product.attributes + assert product.attributes["dataset"] == "CanESM2" + assert "ensemble_statistics" in product.attributes + assert product.attributes["ensemble_statistics"] == "EnsembleMedian" + assert "start_year" in product.attributes + assert product.attributes["start_year"] == 2000 + assert "end_year" in product.attributes + assert product.attributes["end_year"] == 2000 assert len(settings) == 1 output_products = settings["output_products"] @@ -585,18 +664,16 @@ def test_update_multiproduct_ensemble_statistics_percentile(): for attr in common_attributes: assert attr in product.attributes assert product.attributes[attr] == common_attributes[attr] - assert "alias" in product.attributes - assert product.attributes["alias"] == "EnsemblePercentile5" - assert "dataset" in product.attributes - assert product.attributes["dataset"] == "CanESM2" - assert "ensemble_statistics" in product.attributes - assert product.attributes["ensemble_statistics"] == ( - "EnsemblePercentile5" - ) - assert "start_year" in product.attributes - assert product.attributes["start_year"] == 2000 - assert "end_year" in product.attributes - assert product.attributes["end_year"] == 2000 + assert "alias" in product.attributes + assert product.attributes["alias"] == "EnsemblePercentile5" + assert "dataset" in product.attributes + assert product.attributes["dataset"] == "CanESM2" + assert "ensemble_statistics" in product.attributes + assert product.attributes["ensemble_statistics"] == ("EnsemblePercentile5") + assert "start_year" in product.attributes + assert product.attributes["start_year"] == 2000 + assert "end_year" in product.attributes + assert product.attributes["end_year"] == 2000 assert len(settings) == 1 output_products = settings["output_products"]