Skip to content

Commit

Permalink
Allow using multi model statistics preprocessor on datasets without `…
Browse files Browse the repository at this point in the history
…timerange` (#2644)
  • Loading branch information
schlunma authored Feb 4, 2025
1 parent 11c8eea commit e3eb9ff
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 51 deletions.
20 changes: 12 additions & 8 deletions esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,12 +367,14 @@ def _get_common_attributes(products, settings):
if all(p.attributes.get(key, object()) == value for p in products):
attributes[key] = value

# Ensure that attribute timerange is always available. This depends on the
# "span" setting: if "span=overlap", the intersection of all periods is
# used; if "span=full", the union is used. The default value for "span" is
# "overlap".
# Ensure that attribute timerange is always available if at least one of
# the input datasets defines it. This depends on the "span" setting: if
# "span=overlap", the intersection of all periods is used; if "span=full",
# the union is used. The default value for "span" is "overlap".
span = settings.get("span", "overlap")
for product in products:
if "timerange" not in product.attributes:
continue
timerange = product.attributes["timerange"]
start, end = _parse_period(timerange)
if "timerange" not in attributes:
Expand All @@ -397,10 +399,12 @@ def _get_common_attributes(products, settings):

attributes["timerange"] = _dates_to_timerange(start_date, end_date)

# Ensure that attributes start_year and end_year are always available
start_year, end_year = _parse_period(attributes["timerange"])
attributes["start_year"] = int(str(start_year[0:4]))
attributes["end_year"] = int(str(end_year[0:4]))
# Ensure that attributes start_year and end_year are always available if at
# least one of the input datasets defines it
if "timerange" in attributes:
start_year, end_year = _parse_period(attributes["timerange"])
attributes["start_year"] = int(str(start_year[0:4]))
attributes["end_year"] = int(str(end_year[0:4]))

return attributes

Expand Down
10 changes: 7 additions & 3 deletions esmvalcore/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,14 +539,18 @@ def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path:
# Remove duplicate segments:
filename_segments = list(dict.fromkeys(filename_segments))

# Add period and extension
filename_segments.append(f"{attributes['timerange'].replace('/', '-')}.nc")
# Add time period if possible
if "timerange" in attributes:
filename_segments.append(
f"{attributes['timerange'].replace('/', '-')}"
)

filename = f"{'_'.join(filename_segments)}.nc"
outfile = Path(
preproc_dir,
attributes["diagnostic"],
attributes["variable_group"],
"_".join(filename_segments),
filename,
)

return outfile
Expand Down
157 changes: 117 additions & 40 deletions tests/unit/recipe/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,19 @@ def test_multi_model_filename_full():
assert attributes["end_year"] == 1992


@pytest.mark.parametrize("span", ["full", "overlap"])
def test_multi_model_filename_no_timerange(span):
"""Test timerange in multi-model filename is correct."""
cube = iris.cube.Cube(np.array([1]))
products = [
PreprocessorFile(cube, "A", {}),
PreprocessorFile(cube, "B", {}),
]
settings = {"span": span}
attributes = _recipe._get_common_attributes(products, settings)
assert "timerange" not in attributes


def test_update_multiproduct_multi_model_statistics():
"""Test ``_update_multiproduct``."""
settings = {
Expand Down Expand Up @@ -317,15 +330,15 @@ def test_update_multiproduct_multi_model_statistics():
for attr in common_attributes:
assert attr in product.attributes
assert product.attributes[attr] == common_attributes[attr]
assert "alias" in product.attributes
assert "dataset" in product.attributes
assert "multi_model_statistics" in product.attributes
assert "timerange" in product.attributes
assert product.attributes["timerange"] == "2002/2004"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2002
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2004
assert "alias" in product.attributes
assert "dataset" in product.attributes
assert "multi_model_statistics" in product.attributes
assert "timerange" in product.attributes
assert product.attributes["timerange"] == "2002/2004"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2002
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2004
if "MultiModelStd_Dev" in str(product.filename):
assert product.attributes["alias"] == "MultiModelStd_Dev"
assert product.attributes["dataset"] == "MultiModelStd_Dev"
Expand All @@ -352,6 +365,72 @@ def test_update_multiproduct_multi_model_statistics():
assert "MultiModelStd_Dev" in str(stats["std_dev"].filename)


def test_update_multiproduct_no_timerange():
"""Test ``_update_multiproduct``."""
settings = {
"multi_model_statistics": {"statistics": ["mean"]},
"save": {"compute": False},
}
common_attributes = {
"project": "CMIP6",
"diagnostic": "d",
"variable_group": "var",
}
cube = iris.cube.Cube(np.array([1]))
products = [
PreprocessorFile(
cube,
"A",
attributes={
"dataset": "a",
**common_attributes,
},
settings=settings,
),
PreprocessorFile(
cube,
"B",
attributes={
"dataset": "b",
**common_attributes,
},
settings=settings,
),
]
order = ("load", "multi_model_statistics", "save")
preproc_dir = "/preproc"
step = "multi_model_statistics"
output, settings = _recipe._update_multiproduct(
products, order, preproc_dir, step
)

assert len(output) == 1
product = list(output)[0]

assert product.filename == Path("/preproc/d/var/CMIP6_MultiModelMean.nc")

for attr in common_attributes:
assert attr in product.attributes
assert product.attributes[attr] == common_attributes[attr]
assert "alias" in product.attributes
assert "dataset" in product.attributes
assert "multi_model_statistics" in product.attributes
assert "timerange" not in product.attributes
assert "start_year" not in product.attributes
assert "end_year" not in product.attributes
assert product.attributes["alias"] == "MultiModelMean"
assert product.attributes["dataset"] == "MultiModelMean"
assert product.attributes["multi_model_statistics"] == "MultiModelMean"

assert len(settings) == 1
output_products = settings["output_products"]
assert len(output_products) == 1
stats = output_products[""]
assert len(stats) == 1
assert "mean" in stats
assert "MultiModelMean" in str(stats["mean"].filename)


def test_update_multiproduct_multi_model_statistics_percentile():
"""Test ``_update_multiproduct``."""
settings = {
Expand Down Expand Up @@ -434,15 +513,15 @@ def test_update_multiproduct_multi_model_statistics_percentile():
for attr in common_attributes:
assert attr in product.attributes
assert product.attributes[attr] == common_attributes[attr]
assert "alias" in product.attributes
assert "dataset" in product.attributes
assert "multi_model_statistics" in product.attributes
assert "timerange" in product.attributes
assert product.attributes["timerange"] == "2002/2004"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2002
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2004
assert "alias" in product.attributes
assert "dataset" in product.attributes
assert "multi_model_statistics" in product.attributes
assert "timerange" in product.attributes
assert product.attributes["timerange"] == "2002/2004"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2002
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2004
if "MultiModelPercentile5-0" in str(product.filename):
assert product.attributes["alias"] == "MultiModelPercentile5-0"
assert product.attributes["dataset"] == "MultiModelPercentile5-0"
Expand Down Expand Up @@ -513,16 +592,16 @@ def test_update_multiproduct_ensemble_statistics():
for attr in common_attributes:
assert attr in product.attributes
assert product.attributes[attr] == common_attributes[attr]
assert "alias" in product.attributes
assert product.attributes["alias"] == "EnsembleMedian"
assert "dataset" in product.attributes
assert product.attributes["dataset"] == "CanESM2"
assert "ensemble_statistics" in product.attributes
assert product.attributes["ensemble_statistics"] == "EnsembleMedian"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2000
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2000
assert "alias" in product.attributes
assert product.attributes["alias"] == "EnsembleMedian"
assert "dataset" in product.attributes
assert product.attributes["dataset"] == "CanESM2"
assert "ensemble_statistics" in product.attributes
assert product.attributes["ensemble_statistics"] == "EnsembleMedian"
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2000
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2000

assert len(settings) == 1
output_products = settings["output_products"]
Expand Down Expand Up @@ -585,18 +664,16 @@ def test_update_multiproduct_ensemble_statistics_percentile():
for attr in common_attributes:
assert attr in product.attributes
assert product.attributes[attr] == common_attributes[attr]
assert "alias" in product.attributes
assert product.attributes["alias"] == "EnsemblePercentile5"
assert "dataset" in product.attributes
assert product.attributes["dataset"] == "CanESM2"
assert "ensemble_statistics" in product.attributes
assert product.attributes["ensemble_statistics"] == (
"EnsemblePercentile5"
)
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2000
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2000
assert "alias" in product.attributes
assert product.attributes["alias"] == "EnsemblePercentile5"
assert "dataset" in product.attributes
assert product.attributes["dataset"] == "CanESM2"
assert "ensemble_statistics" in product.attributes
assert product.attributes["ensemble_statistics"] == ("EnsemblePercentile5")
assert "start_year" in product.attributes
assert product.attributes["start_year"] == 2000
assert "end_year" in product.attributes
assert product.attributes["end_year"] == 2000

assert len(settings) == 1
output_products = settings["output_products"]
Expand Down

0 comments on commit e3eb9ff

Please sign in to comment.