From 1ab7c6acab6476579bd70d87489ea370444311e3 Mon Sep 17 00:00:00 2001 From: abel Date: Tue, 3 May 2022 18:00:20 +0200 Subject: [PATCH 01/10] WIP: - test with real data - add unit tests - Add filtering of missing values (start and end of data sampled period) when having DJF or similar season (between years) We can have a workaround in icclim until it is done in xclim - fix todos - add documentation on slice_mode doc --- doc/source/references/icclim_index_api.rst | 10 +- icclim/main.py | 4 +- icclim/models/frequency.py | 143 +++++++++++++++------ icclim/models/user_index_config.py | 2 +- icclim/tests/test_frequency.py | 19 +++ icclim/tests/test_main.py | 4 +- icclim/tests/test_utils.py | 9 +- setup.py | 3 +- 8 files changed, 143 insertions(+), 51 deletions(-) diff --git a/doc/source/references/icclim_index_api.rst b/doc/source/references/icclim_index_api.rst index 1f9d0599..71cc2f97 100644 --- a/doc/source/references/icclim_index_api.rst +++ b/doc/source/references/icclim_index_api.rst @@ -68,15 +68,15 @@ summer, autumn and monthly frequency: | The winter season (``DJF``) of 2000 is composed of December 2000, January 2001 and February 2001. | Likewise, the winter half-year (``ONDJFM``) of 2000 includes October 2000, November 2000, December 2000, January 2001, February 2001 and March 2001. -Monthly time series with months selected by user: - >>> slice_mode = ['month',[4,5,11]] # index will be computed only for April, May and November +Monthly time series with months selected by user (the keyword can be either "month" or "months"): + >>> slice_mode = ['month', [4,5,11]] # index will be computed only for April, May and November or - >>> slice_mode = ['month',[4]] # index will be computed only for April + >>> slice_mode = ['month', [4]] # index will be computed only for April User defined seasons: - >>> slice_mode = ['season',[4,5,6,7]] + >>> slice_mode = ['season', [4,5,6,7]] or - >>> slice_mode = ['season',([11, 12, 1])] + >>> slice_mode = ['season', ([11, 12, 1])] ``threshold`` ~~~~~~~~~~~~~ diff --git a/icclim/main.py b/icclim/main.py index 9b99d699..0667fe5f 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -113,13 +113,13 @@ def index( index_name: str | None = None, # optional when computing user_indices var_name: str | list[str] | None = None, slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] = None, + time_range: list[datetime] = None, # TODO: use dateparser to accept strings out_file: str | None = None, threshold: float | list[float] | None = None, callback: Callable[[int], None] = log.callback, callback_percentage_start_value: int = 0, callback_percentage_total: int = 100, - base_period_time_range: list[datetime] | None = None, + base_period_time_range: list[datetime] | None = None, # TODO: use dateparser to accept strings window_width: int = 5, only_leap_years: bool = False, ignore_Feb29th: bool = False, diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 6be3331d..aa5c75a6 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -5,11 +5,13 @@ """ from __future__ import annotations -import datetime +from datetime import datetime, timedelta from enum import Enum from typing import Callable, List, Tuple, Union import cftime + +import dateparser import numpy as np import pandas as pd import xarray as xr @@ -36,8 +38,10 @@ def filter_months(da: DataArray, month_list: list[int]) -> DataArray: def get_seasonal_time_updater( - start_month: int, - end_month: int, + start_month: int, + end_month: int, + start_day: int = 1, + end_day: int = None ) -> Callable[[DataArray], tuple[DataArray, DataArray]]: """Seasonal time updater and time bounds creator method generator. Returns a callable of DataArray which will rewrite the time dimension to @@ -68,15 +72,29 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: year_of_season_end = year if isinstance(first_time, cftime.datetime): start = cftime.datetime( - year, start_month, 1, calendar=first_time.calendar - ) - end = cftime.datetime( - year_of_season_end, end_month + 1, 1, calendar=first_time.calendar + year, start_month, start_day, calendar=first_time.calendar ) + if end_day is None: + end = cftime.datetime( + year_of_season_end, + end_month + 1, + 1, + calendar=first_time.calendar + ) + else: + end = cftime.datetime( + year_of_season_end, + end_month, + end_day, + calendar=first_time.calendar + ) else: - start = pd.to_datetime(f"{year}-{start_month}") - end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") - end = end - datetime.timedelta(days=1) + start = pd.to_datetime(f"{year}-{start_month}-{start_day}") + if end_day is None: + end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") + end = end - timedelta(days=1) + else: + end = pd.to_datetime(f"{year_of_season_end}-{end_month}-{end_day}") new_time_axis.append(start + (end - start) / 2) time_bounds.append([start, end]) da.coords["time"] = ("time", new_time_axis) @@ -112,7 +130,7 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: ] ) ends = starts + offset - ends = ends - datetime.timedelta(days=1) + ends = ends - timedelta(days=1) else: offset = pd.tseries.frequencies.to_offset(freq) starts = pd.to_datetime(da.time.dt.floor("D")) @@ -312,43 +330,57 @@ def _is_season_valid(months): def _get_frequency_from_list(slice_mode_list: list) -> Frequency: if len(slice_mode_list) < 2: raise InvalidIcclimArgumentError( - f"The given slice list {slice_mode_list}" - f" has a length of {len(slice_mode_list)}." - f" The maximum length here is 2." + f"Invalid slice_mode format." + f" When slice_mode is a list, its first element must be a keyword and" + f" its second a list (e.g `slice_mode=['season', [1,2,3]]` )." ) sampling_freq = slice_mode_list[0] custom_freq = Frequency.CUSTOM if sampling_freq in ["month", "months"]: - months = slice_mode_list[1] - - def month_list_post_processing(da): - res, bounds = _get_time_bounds_updater("MS")(da) - res = get_month_filter(months)(res) - return res, bounds - + season_bounds = slice_mode_list[1] custom_freq._freq = _Freq( - pre_processing=get_month_filter(months), - post_processing=month_list_post_processing, + pre_processing=get_month_filter(season_bounds), + post_processing=_get_filterer_and_time_updater(season_bounds), panda_freq="MS", - description=f"monthly time series (months: {months})", + description=f"monthly time series (months: {season_bounds})", accepted_values=[], ) elif sampling_freq == "season": - months = slice_mode_list[1] - if isinstance(months, Tuple): - months = months[0] + months[1] # concat in case of ([12], [1, 2]) - if not _is_season_valid(months): - raise InvalidIcclimArgumentError( - f"A season created using `slice_mode` must be made of consecutive" - f" months. It was {months}." + season_bounds = slice_mode_list[1] + if isinstance(season_bounds, Tuple): + # concat in case of ([12], [1, 2]) + season_bounds = season_bounds[0] + season_bounds[1] + err_msg = f"A season created using `slice_mode` must be made of either" \ + f" consecutive integer for months such as [1,2,3] or two string for" \ + f" dates such as ['19 july', '14 august']." + if isinstance(season_bounds[0], str): + # season between two dates + if len(season_bounds) != 2: + raise InvalidIcclimArgumentError(err_msg) + begin_date, end_date, delta = read_dates(*season_bounds) + custom_freq._freq = _Freq( + pre_processing=_get_between_dates_filter(begin_date, end_date), + post_processing=get_seasonal_time_updater(begin_date.month, + end_date.month, + begin_date.day, + end_date.day), + # todo ideally we should offset panda_freq by {begin_date.day} days (tbd in xclim) on resample + panda_freq=f"AS-{MONTHS_MAP[begin_date.month]}", + description=f"seasonal time series (season: from {begin_date} to {end_date})", + accepted_values=[], + ) + elif isinstance(season_bounds[0], int): + # season made of consecutive months + if not _is_season_valid(season_bounds): + raise InvalidIcclimArgumentError(err_msg) + custom_freq._freq = _Freq( + pre_processing=get_month_filter(season_bounds), + post_processing=get_seasonal_time_updater(season_bounds[0], + season_bounds[-1]), + panda_freq=f"AS-{MONTHS_MAP[season_bounds[0]]}", + description=f"seasonal time series (season: {season_bounds})", + accepted_values=[], ) - custom_freq._freq = _Freq( - pre_processing=get_month_filter(months), - post_processing=get_seasonal_time_updater(months[0], months[-1]), - panda_freq=f"AS-{MONTHS_MAP[months[0]]}", - description=f"seasonal time series (season: {months})", - accepted_values=[], - ) else: raise InvalidIcclimArgumentError( f"Unknown frequency {slice_mode_list}." @@ -357,4 +389,39 @@ def month_list_post_processing(da): return custom_freq +def _get_between_dates_filter(begin_date: datetime, end_date: datetime): + return lambda da: filter_between_dates(da, begin_date, end_date) + + +def filter_between_dates(da: DataArray, d1: datetime, d2: datetime): + between_dates_range = pd.date_range(d1, d2, freq="D").dayofyear + between_dates_mask = np.logical_and( + da.time.dt.dayofyear >= between_dates_range.min(), + da.time.dt.dayofyear <= between_dates_range.max()) + return da[between_dates_mask] + + +def read_dates(d1: str, d2: str) -> tuple[datetime, datetime, int]: + return date1 := read_date(d1), date2 := read_date(d2), (date2 - date1).days + + +def read_date(date_string: str) -> datetime: + error_msg = ( + "The date {} does not have a valid format." + " You can use various formats such as '2 december' or '02-12'." + ) + if (date := dateparser.parse(date_string)) == None: + raise InvalidIcclimArgumentError(error_msg.format(date_string)) + return date + + +def _get_filterer_and_time_updater(season_bounds): + def filter_and_update_time(da): + res, bounds = _get_time_bounds_updater("MS")(da) + res = get_month_filter(season_bounds)(res) + return res, bounds + + return filter_and_update_time + + SliceMode = Union[Frequency, str, List[Union[str, Tuple, int]]] diff --git a/icclim/models/user_index_config.py b/icclim/models/user_index_config.py index 109fabf9..4eb5df5a 100644 --- a/icclim/models/user_index_config.py +++ b/icclim/models/user_index_config.py @@ -133,7 +133,7 @@ def __init__( var_type=None, is_percent=False, save_percentile=False, - ref_time_range: list[str] = None, + ref_time_range: list[str] = None, # TODO: use dateparser to accept strings ) -> None: self.index_name = index_name self.calc_operation = calc_operation diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index d1458335..2e24fb78 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -64,6 +64,13 @@ def test_winter(self): assert freq.accepted_values == [] assert freq.post_processing is not None + def test_lookup_season_between_dates(self): + freq = Frequency.lookup(["season",["07-19","08-14"]]) + assert freq == Frequency.CUSTOM + assert freq.panda_freq == "AS-JUL" + assert freq.accepted_values == [] + assert freq.post_processing is not None + class Test_filter_months: def test_simple(self): @@ -106,3 +113,15 @@ def test_winter(self): time_bds_res[1].data[1] == pd.to_datetime("2043-02") - pd.tseries.offsets.Day() ) + + @pytest.mark.parametrize("use_cf", [True, False]) + def test_between_dates(self, use_cf): + # WHEN + test_da = filter_months(stub_tas(use_cftime=use_cf), [11, 12, 1])\ + .resample(time="AS-NOV")\ + .mean() + da_res, time_bds_res = get_seasonal_time_updater(11, 1, 2, 30)(test_da) + # THEN + np.testing.assert_array_equal(1, da_res) # data must be unchanged + assert time_bds_res[0].data[0] == pd.to_datetime("2041-11-02") + assert time_bds_res[0].data[1] == pd.to_datetime("2042-01-30") diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 9af365f2..1fa9731c 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -42,8 +42,8 @@ class Test_Integration: """ OUTPUT_FILE = "out.nc" - CF_TIME_RANGE = pd.date_range(start="2042-01-01", end="2045-12-31", freq="D") - TIME_RANGE = xr.cftime_range("2042-01-01", end="2045-12-31", freq="D") + TIME_RANGE = pd.date_range(start="2042-01-01", end="2045-12-31", freq="D") + CF_TIME_RANGE = xr.cftime_range("2042-01-01", end="2045-12-31", freq="D") data = xr.DataArray( data=(np.full(len(TIME_RANGE), 20).reshape((len(TIME_RANGE), 1, 1))), dims=["time", "lat", "lon"], diff --git a/icclim/tests/test_utils.py b/icclim/tests/test_utils.py index b961c05b..31001561 100644 --- a/icclim/tests/test_utils.py +++ b/icclim/tests/test_utils.py @@ -8,7 +8,9 @@ from icclim.models.index_config import CfVariable from icclim.models.user_index_config import UserIndexConfig -VALUE_COUNT = 365 * 5 + 1 +import xarray as xr + +VALUE_COUNT = 365 * 5 + 1 # 5 years of data (with 1 leap year) COORDS = dict( lat=[42], lon=[42], @@ -16,6 +18,7 @@ ) K2C = 273.15 +CF_TIME_RANGE = xr.cftime_range("2042-01-01", periods=VALUE_COUNT, freq="D") def stub_user_index(cf_vars: List[CfVariable]): return UserIndexConfig( @@ -23,13 +26,15 @@ def stub_user_index(cf_vars: List[CfVariable]): ) -def stub_tas(tas_value: float = 1.0, use_dask=False): +def stub_tas(tas_value: float = 1.0, use_dask=False, use_cftime=False): da = xarray.DataArray( data=(np.full(VALUE_COUNT, tas_value).reshape((VALUE_COUNT, 1, 1))), dims=["time", "lat", "lon"], coords=COORDS, attrs={"units": "K"}, ) + if use_cftime: + da["time"] = CF_TIME_RANGE if use_dask: da.chunk() return da diff --git a/setup.py b/setup.py index 1a882472..275b1672 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from icclim.models.constants import ICCLIM_VERSION MINIMAL_REQUIREMENTS = [ - # todo: Unpin numpy 1.22 once numba works with it (numba comes with xclim) + # todo: Unpin numpy 1.22 once numba work with it (numba comes with xclim) # https://github.com/numba/numba/issues/7754 "numpy>=1.16,<1.22", "xarray>=0.17", @@ -19,6 +19,7 @@ "zarr", "rechunker>=0.3, !=0.4", "fsspec", + "dateparser", ] setup( From 3f8f36828e1186a450b53e9cfba8d2e8ee6b89c4 Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 6 May 2022 05:43:55 +0200 Subject: [PATCH 02/10] MAINT: Cleanup --- icclim/models/constants.py | 6 +++--- icclim/models/frequency.py | 13 ++++++------- icclim/tests/test_generated_api.py | 22 +++++++++++----------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/icclim/models/constants.py b/icclim/models/constants.py index cecefa7e..9c3346cd 100644 --- a/icclim/models/constants.py +++ b/icclim/models/constants.py @@ -3,7 +3,7 @@ ICCLIM_VERSION = "5.2.2-dev" -# placeholder for user_indices +# placeholder for user_index PERCENTILE_THRESHOLD_STAMP = "p" WET_DAY_THRESHOLD = 1 # 1mm PRECIPITATION = "p" @@ -20,10 +20,10 @@ TAS_MAX = ["tasmax", "tasmaxadjust", "tmax", "tx", "maxt", "TMAX", "Tmax", "TX", "MAXT", "maxT"] TAS_MIN = ["tasmin", "tasminadjust", "tmin", "tn", "mint", "TMIN", "Tmin", "TN", "MINT", "minT"] -# Source of index definition +# Source of ECA&D indices definition ECAD_ATBD = "ECA&D, Algorithm Theoretical Basis Document (ATBD) v11" -# Index qualifiers +# Index qualifiers (needed to generate the API) QUANTILE_BASED = "QUANTILE_BASED" # fields: QUANTILE_INDEX_FIELDS MODIFIABLE_UNIT = "MODIFIABLE_UNIT" # fields: out_unit MODIFIABLE_THRESHOLD = "MODIFIABLE_THRESHOLD" # fields: threshold diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index aa5c75a6..600740fa 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -1,6 +1,6 @@ """ `icclim.models.frequency` wraps the concept of pandas frequency in order to resample - time series. `slice_mode` paramater of `icclim.index` is always converted to a + time series. `slice_mode` parameter of `icclim.index` is always converted to a `Frequency`. """ from __future__ import annotations @@ -79,20 +79,19 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: year_of_season_end, end_month + 1, 1, - calendar=first_time.calendar - ) + calendar=first_time.calendar, + )- timedelta(days=1) else: end = cftime.datetime( year_of_season_end, end_month, end_day, - calendar=first_time.calendar + calendar=first_time.calendar, ) else: start = pd.to_datetime(f"{year}-{start_month}-{start_day}") if end_day is None: - end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") - end = end - timedelta(days=1) + end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") - timedelta(days=1) else: end = pd.to_datetime(f"{year_of_season_end}-{end_month}-{end_day}") new_time_axis.append(start + (end - start) / 2) @@ -316,7 +315,7 @@ def _get_frequency_from_string(slice_mode: str) -> Frequency: raise InvalidIcclimArgumentError(f"Unknown frequency {slice_mode}.") -def _is_season_valid(months): +def _is_season_valid(months: list[int]) -> bool: is_valid = True for i in range(0, len(months) - 1): is_valid = is_valid and months[i] > 0 and months[i] < 13 diff --git a/icclim/tests/test_generated_api.py b/icclim/tests/test_generated_api.py index fd8d83dc..49c5fce0 100644 --- a/icclim/tests/test_generated_api.py +++ b/icclim/tests/test_generated_api.py @@ -121,7 +121,7 @@ def test_txx__months_slice_mode(): # integration test @pytest.mark.parametrize( - "operator, exp_y1, exp_y2", + "operator, expectation_year_1, expectation_year_2", [ (CalcOperation.MIN, 303.15, 280.15), (CalcOperation.MAX, 303.15, 280.15), @@ -131,7 +131,7 @@ def test_txx__months_slice_mode(): (CalcOperation.MAX_NUMBER_OF_CONSECUTIVE_EVENTS, 1, 1), ], ) -def test_custom_index__season_slice_mode(operator, exp_y1, exp_y2): +def test_custom_index__season_slice_mode(operator, expectation_year_1, expectation_year_2): tas = stub_tas(2.0) tas.loc[{"time": "2042-01-01"}] = 303.15 tas.loc[{"time": "2042-12-01"}] = 280.15 @@ -145,20 +145,20 @@ def test_custom_index__season_slice_mode(operator, exp_y1, exp_y2): "logical_operation": "gt", "thresh": 275, }, - ).compute() - np.testing.assert_almost_equal(res.pouet.isel(time=0), exp_y1) - np.testing.assert_almost_equal(res.pouet.isel(time=1), exp_y2) + ) + np.testing.assert_almost_equal(res.pouet.isel(time=0), expectation_year_1) + np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_2) # integration test @pytest.mark.parametrize( - "operator, exp_y1, exp_y2", + "operator, expectation_year_1, expectation_year_2", [ (CalcOperation.RUN_MEAN, 2, 2), (CalcOperation.RUN_SUM, 14, 14), ], ) -def test_custom_index_run_algos__season_slice_mode(operator, exp_y1, exp_y2): +def test_custom_index_run_algos__season_slice_mode(operator, expectation_year_1, expectation_year_2): tas = stub_tas(2.0) res = icclim.custom_index( in_files=tas, @@ -170,9 +170,9 @@ def test_custom_index_run_algos__season_slice_mode(operator, exp_y1, exp_y2): "extreme_mode": "max", "window_width": 7, }, - ).compute() - np.testing.assert_almost_equal(res.pouet.isel(time=0), exp_y1) - np.testing.assert_almost_equal(res.pouet.isel(time=1), exp_y2) + ) + np.testing.assert_almost_equal(res.pouet.isel(time=0), expectation_year_1) + np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_2) def test_custom_index_anomaly__season_slice_mode(): @@ -187,5 +187,5 @@ def test_custom_index_anomaly__season_slice_mode(): "calc_operation": CalcOperation.ANOMALY, "ref_time_range": [datetime(2042, 1, 1), datetime(2044, 12, 31)], }, - ).compute() + ) np.testing.assert_almost_equal(res.anomaly, 0.96129032) From 999bb4ecd006660b2500f0a6e1c4d7570a608ac6 Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 6 May 2022 05:45:58 +0200 Subject: [PATCH 03/10] MAINT: Use xclim `indexer` --- icclim/ecad_functions.py | 187 ++++++++++------------ icclim/main.py | 3 - icclim/models/frequency.py | 219 ++++++++++++-------------- icclim/models/index_config.py | 4 - icclim/models/user_index_config.py | 6 +- icclim/tests/test_frequency.py | 30 ++-- icclim/tests/test_generated_api.py | 14 +- icclim/tests/test_main.py | 33 +++- icclim/user_indices/calc_operation.py | 15 +- 9 files changed, 238 insertions(+), 273 deletions(-) diff --git a/icclim/ecad_functions.py b/icclim/ecad_functions.py index ed79a0cd..9bfbba9c 100644 --- a/icclim/ecad_functions.py +++ b/icclim/ecad_functions.py @@ -3,7 +3,7 @@ metadata to it. """ import re -from typing import Callable, Optional, Tuple +from typing import Callable, Optional, Tuple, Any from warnings import warn import numpy as np @@ -25,7 +25,7 @@ def gd4(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tas.study_da, threshold=4.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.growing_degree_days, ) @@ -34,7 +34,7 @@ def cfd(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmin.study_da, threshold=0.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.consecutive_frost_days, ) @@ -43,7 +43,7 @@ def fd(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmin.study_da, threshold=0.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.frost_days, ) @@ -52,7 +52,7 @@ def hd17(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tas.study_da, threshold=17.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.heating_degree_days, ) @@ -61,7 +61,7 @@ def id(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmax.study_da, threshold=0.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.ice_days, ) @@ -70,7 +70,7 @@ def csdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: thresh = 10 if config.threshold is None else config.threshold return _compute_spell_duration( cf_var=config.tasmin, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), per_thresh=thresh, per_window=config.window, per_interpolation=config.interpolation, @@ -84,7 +84,7 @@ def csdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tg10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tas, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=10, per_window=config.window, per_interpolation=config.interpolation, @@ -98,7 +98,7 @@ def tg10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tn10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tasmin, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=10, per_window=config.window, per_interpolation=config.interpolation, @@ -112,7 +112,7 @@ def tn10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tx10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tasmax, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=10, per_window=config.window, per_interpolation=config.interpolation, @@ -124,20 +124,20 @@ def tx10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def txn(config: IndexConfig) -> DataArray: - result = atmos.tx_min(config.tasmax.study_da, freq=config.freq.panda_freq) + result = atmos.tx_min(config.tasmax.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result def tnn(config: IndexConfig) -> DataArray: - result = atmos.tn_min(config.tasmin.study_da, freq=config.freq.panda_freq) + result = atmos.tn_min(config.tasmin.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result def cdd(config: IndexConfig) -> DataArray: result = atmos.maximum_consecutive_dry_days( - config.pr.study_da, thresh="1.0 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="1.0 mm/day", **_build_frequency_kwargs(config) ) return result @@ -146,7 +146,7 @@ def su(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmax.study_da, threshold=25.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.tx_days_above, ) @@ -155,7 +155,7 @@ def tr(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmin.study_da, threshold=20.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.tropical_nights, ) @@ -164,7 +164,7 @@ def wsdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: thresh = 90 if config.threshold is None else config.threshold return _compute_spell_duration( cf_var=config.tasmax, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), per_thresh=thresh, per_window=config.window, per_interpolation=config.interpolation, @@ -178,7 +178,7 @@ def wsdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tg90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tas, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=90, per_window=config.window, per_interpolation=config.interpolation, @@ -192,7 +192,7 @@ def tg90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tn90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tasmin, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=90, per_window=config.window, per_interpolation=config.interpolation, @@ -206,7 +206,7 @@ def tn90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def tx90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_temperature_percentile_index( cf_var=config.tasmax, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=90, per_window=config.window, per_interpolation=config.interpolation, @@ -218,13 +218,13 @@ def tx90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def txx(config: IndexConfig) -> DataArray: - result = atmos.tx_max(config.tasmax.study_da, freq=config.freq.panda_freq) + result = atmos.tx_max(config.tasmax.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result def tnx(config: IndexConfig) -> DataArray: - result = atmos.tn_max(config.tasmin.study_da, freq=config.freq.panda_freq) + result = atmos.tn_max(config.tasmin.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result @@ -233,7 +233,7 @@ def csu(config: IndexConfig) -> DataArray: return _compute_threshold_index( da=config.tasmax.study_da, threshold=25.0 if config.threshold is None else config.threshold, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), xclim_index_fun=atmos.maximum_consecutive_warm_days, ) @@ -241,56 +241,56 @@ def csu(config: IndexConfig) -> DataArray: def prcptot(config: IndexConfig) -> DataArray: result = atmos.precip_accumulation( _filter_in_wet_days(config.pr.study_da, dry_day_value=0), - freq=config.freq.panda_freq, + **_build_frequency_kwargs(config), ) return result def rr1(config: IndexConfig) -> DataArray: result = atmos.wetdays( - config.pr.study_da, thresh="1.0 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="1.0 mm/day", **_build_frequency_kwargs(config) ) return result def sdii(config: IndexConfig) -> DataArray: result = atmos.daily_pr_intensity( - config.pr.study_da, thresh="1.0 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="1.0 mm/day", **_build_frequency_kwargs(config) ) return result def cwd(config: IndexConfig) -> DataArray: result = atmos.maximum_consecutive_wet_days( - config.pr.study_da, thresh="1.0 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="1.0 mm/day", **_build_frequency_kwargs(config) ) return result def r10mm(config: IndexConfig) -> DataArray: result = atmos.wetdays( - config.pr.study_da, thresh="10 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="10 mm/day", **_build_frequency_kwargs(config) ) return result def r20mm(config: IndexConfig) -> DataArray: result = atmos.wetdays( - config.pr.study_da, thresh="20 mm/day", freq=config.freq.panda_freq + config.pr.study_da, thresh="20 mm/day", **_build_frequency_kwargs(config) ) return result def rx1day(config: IndexConfig) -> DataArray: result = atmos.max_1day_precipitation_amount( - config.pr.study_da, freq=config.freq.panda_freq + config.pr.study_da, **_build_frequency_kwargs(config) ) return result def rx5day(config: IndexConfig) -> DataArray: result = atmos.max_n_day_precipitation_amount( - config.pr.study_da, window=5, freq=config.freq.panda_freq + config.pr.study_da, window=5, **_build_frequency_kwargs(config) ) return result @@ -298,7 +298,7 @@ def rx5day(config: IndexConfig) -> DataArray: def r75p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxp( pr=config.pr, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=75.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -309,7 +309,7 @@ def r75p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def r75ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxptot( pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=75.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -319,7 +319,7 @@ def r75ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def r95p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxp( pr=config.pr, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=95.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -330,7 +330,7 @@ def r95p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def r95ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxptot( pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=95.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -340,7 +340,7 @@ def r95ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def r99p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxp( pr=config.pr, - freq=config.freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=99.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -351,7 +351,7 @@ def r99p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: def r99ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: return _compute_rxxptot( pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), pr_per_thresh=99.0, per_interpolation=config.interpolation, save_percentile=config.save_percentile, @@ -377,7 +377,7 @@ def sd(config: IndexConfig) -> DataArray: ------- returns DataArray of the resulting index """ - result = land.snow_depth(config.pr.study_da, freq=config.freq.panda_freq) + result = land.snow_depth(config.pr.study_da, **_build_frequency_kwargs(config)) return result @@ -401,7 +401,7 @@ def sd1(config: IndexConfig) -> DataArray: returns DataArray of the resulting index """ result = land.snow_cover_duration( - config.pr.study_da, thresh="1 cm", freq=config.freq.panda_freq + config.pr.study_da, thresh="1 cm", **_build_frequency_kwargs(config) ) return result @@ -426,7 +426,7 @@ def sd5cm(config: IndexConfig) -> DataArray: returns DataArray of the resulting index """ result = land.snow_cover_duration( - config.pr.study_da, thresh="5 cm", freq=config.freq.panda_freq + config.pr.study_da, thresh="5 cm", **_build_frequency_kwargs(config) ) return result @@ -451,7 +451,7 @@ def sd50cm(config: IndexConfig) -> DataArray: returns DataArray of the resulting index """ result = land.snow_cover_duration( - config.pr.study_da, thresh="50 cm", freq=config.freq.panda_freq + config.pr.study_da, thresh="50 cm", **_build_frequency_kwargs(config) ) return result @@ -475,7 +475,7 @@ def tg(config: IndexConfig) -> DataArray: ------- returns DataArray of the resulting index """ - result = atmos.tg_mean(config.tas.study_da, freq=config.freq.panda_freq) + result = atmos.tg_mean(config.tas.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result @@ -499,7 +499,7 @@ def tn(config: IndexConfig) -> DataArray: ------- returns DataArray of the resulting index """ - result = atmos.tn_mean(config.tasmin.study_da, freq=config.freq.panda_freq) + result = atmos.tn_mean(config.tasmin.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result @@ -523,7 +523,7 @@ def tx(config: IndexConfig) -> DataArray: ------- returns DataArray of the resulting index """ - result = atmos.tx_mean(config.tasmax.study_da, freq=config.freq.panda_freq) + result = atmos.tx_mean(config.tasmax.study_da, **_build_frequency_kwargs(config)) result = convert_units_to(result, "°C") return result @@ -551,7 +551,7 @@ def dtr(config: IndexConfig) -> DataArray: result = atmos.daily_temperature_range( tasmax=config.tasmax.study_da, tasmin=config.tasmin.study_da, - freq=config.freq.panda_freq, + **_build_frequency_kwargs(config), ) result.attrs["units"] = "°C" return result @@ -580,7 +580,7 @@ def etr(config: IndexConfig) -> DataArray: result = atmos.extreme_temperature_range( tasmax=config.tasmax.study_da, tasmin=config.tasmin.study_da, - freq=config.freq.panda_freq, + **_build_frequency_kwargs(config), ) result.attrs["units"] = "°C" return result @@ -610,7 +610,7 @@ def vdtr(config: IndexConfig) -> DataArray: result = atmos.daily_temperature_range_variability( tasmax=config.tasmax.study_da, tasmin=config.tasmin.study_da, - freq=config.freq.panda_freq, + **_build_frequency_kwargs(config), ) result.attrs["units"] = "°C" return result @@ -640,7 +640,7 @@ def cd(config: IndexConfig) -> DataArray: return compute_compound_index( tas=config.tas, pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=25, pr_per_thresh=25, per_window=config.window, @@ -675,7 +675,7 @@ def cw(config: IndexConfig) -> DataArray: return compute_compound_index( tas=config.tas, pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=25, pr_per_thresh=75, per_window=config.window, @@ -710,7 +710,7 @@ def wd(config: IndexConfig) -> DataArray: return compute_compound_index( tas=config.tas, pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=75, pr_per_thresh=25, per_window=config.window, @@ -733,7 +733,7 @@ def ww(config: IndexConfig) -> DataArray: return compute_compound_index( tas=config.tas, pr=config.pr, - freq=config.freq.panda_freq, + freq_config=_build_frequency_kwargs(config), tas_per_thresh=75, pr_per_thresh=75, per_window=config.window, @@ -823,11 +823,7 @@ def _compute_percentile_doy( callback: Callable = None, ) -> DataArray: per = percentile_doy( - da, - window, - percentile, - alpha=interpolation.alpha, - beta=interpolation.beta, + da, window, percentile, alpha=interpolation.alpha, beta=interpolation.beta, ) if callback is not None: callback(50) @@ -863,18 +859,15 @@ def _filter_in_wet_days(da: DataArray, dry_day_value: float): def _compute_threshold_index( - da: DataArray, - threshold: float, - freq: Frequency, - xclim_index_fun: Callable, + da: DataArray, threshold: float, freq_config: dict, xclim_index_fun: Callable, ) -> DataArray: - result = xclim_index_fun(da, thresh=f"{threshold} °C", freq=freq.panda_freq) + result = xclim_index_fun(da, thresh=f"{threshold} °C", **freq_config) return result def _compute_spell_duration( cf_var: CfVariable, - freq: str, + freq_config: dict, per_window: int, per_thresh: float, per_interpolation: QuantileInterpolation, @@ -884,18 +877,14 @@ def _compute_spell_duration( xclim_index_fun: Callable, ) -> Tuple[DataArray, Optional[DataArray]]: per = _compute_percentile_doy( - cf_var.reference_da, - per_thresh, - per_window, - per_interpolation, - callback, + cf_var.reference_da, per_thresh, per_window, per_interpolation, callback, ) run_bootstrap = _can_run_bootstrap(cf_var) result = xclim_index_fun( cf_var.study_da, per, window=min_spell_duration, - freq=freq, + **freq_config, bootstrap=run_bootstrap, ) result = result.squeeze(PERCENTILES_COORD, drop=True) @@ -914,7 +903,7 @@ def _compute_spell_duration( def compute_compound_index( tas: CfVariable, pr: CfVariable, - freq: str, + freq_config: dict, tas_per_thresh: int, pr_per_thresh: int, per_window: int, @@ -942,10 +931,9 @@ def compute_compound_index( window : int window in days used to computed each percentile. default is 5. - freq : str - pandas like frequency. - Used to determine the time slices of the output. - Default is "YS" as in Year Start. + freq_config : dict + pandas like frequency in "freq" key and either seasonal filtering in + "date_bounds" or month filtering in "month" save_percentile : bool Flag to include coordinate variable including the computed percentiles. Does not contain the bootstrapped percentiles. @@ -962,24 +950,16 @@ def compute_compound_index( Otherwise, returns the index_result """ tas_per = _compute_percentile_doy( - tas.reference_da, - tas_per_thresh, - per_window, - per_interpolation, - callback, + tas.reference_da, tas_per_thresh, per_window, per_interpolation, callback, ) tas_per = tas_per.squeeze(PERCENTILES_COORD, drop=True) pr_in_base = _filter_in_wet_days(pr.reference_da, dry_day_value=np.NAN) pr_out_of_base = _filter_in_wet_days(pr.study_da, dry_day_value=0) pr_per = _compute_percentile_doy( - pr_in_base, - pr_per_thresh, - per_window, - per_interpolation, - callback, + pr_in_base, pr_per_thresh, per_window, per_interpolation, callback, ) pr_per = pr_per.squeeze(PERCENTILES_COORD, drop=True) - result = xclim_index_fun(tas.study_da, tas_per, pr_out_of_base, pr_per, freq=freq) + result = xclim_index_fun(tas.study_da, tas_per, pr_out_of_base, pr_per, **freq_config) if save_percentile: # FIXME, not consistent with other percentile based indices # We should probably return a Tuple (res, [tas_per, pr_per]) @@ -992,7 +972,7 @@ def compute_compound_index( def _compute_rxxptot( pr: CfVariable, - freq: str, + freq_config: dict, pr_per_thresh: float, per_interpolation: QuantileInterpolation, save_percentile: bool, @@ -1002,11 +982,7 @@ def _compute_rxxptot( base_wet_days, per_interpolation, pr_per_thresh ) result = atmos.fraction_over_precip_thresh( - pr.study_da, - per, - thresh="1 mm/day", - freq=freq, - bootstrap=False, + pr.study_da, per, thresh="1 mm/day", **freq_config, bootstrap=False, ).squeeze(PERCENTILES_COORD, drop=True) result = result * 100 result.attrs["units"] = "%" @@ -1017,7 +993,7 @@ def _compute_rxxptot( def _compute_rxxp( pr: CfVariable, - freq: Frequency, + freq_config: dict, pr_per_thresh: float, per_interpolation: QuantileInterpolation, save_percentile: bool, @@ -1028,15 +1004,11 @@ def _compute_rxxp( base_wet_days, per_interpolation, pr_per_thresh ) result = atmos.days_over_precip_thresh( - pr.study_da, - per, - thresh="1 mm/day", - freq=freq.panda_freq, - bootstrap=False, + pr.study_da, per, thresh="1 mm/day", **freq_config, bootstrap=False, ) result = result.squeeze(PERCENTILES_COORD, drop=True) if is_percent: - result = _to_percent(result, freq) + result = _to_percent(result, freq_config["freq"]) if save_percentile: return result, per return result, None @@ -1044,7 +1016,7 @@ def _compute_rxxp( def _compute_temperature_percentile_index( cf_var: CfVariable, - freq: Frequency, + freq_config: dict, tas_per_thresh: int, per_window: int, per_interpolation: QuantileInterpolation, @@ -1055,22 +1027,23 @@ def _compute_temperature_percentile_index( ) -> Tuple[DataArray, Optional[DataArray]]: run_bootstrap = _can_run_bootstrap(cf_var) per = _compute_percentile_doy( - cf_var.reference_da, - tas_per_thresh, - per_window, - per_interpolation, - callback, + cf_var.reference_da, tas_per_thresh, per_window, per_interpolation, callback, ).compute() result = xclim_index_fun( - cf_var.study_da, - per, - freq=freq.panda_freq, - bootstrap=run_bootstrap, + cf_var.study_da, per, **freq_config, bootstrap=run_bootstrap, ).squeeze(PERCENTILES_COORD, drop=True) if run_bootstrap: result = _add_bootstrap_meta(result, per) if is_percent: - result = _to_percent(result, freq) + result = _to_percent(result, freq_config["freq"]) if save_percentile: return result, per return result, None + + +def _build_frequency_kwargs(config) -> dict[str, Any]: + """Build kwargs with possible keys in {"freq", "month", "date_bounds"}""" + kwargs = dict(freq=config.freq.pandas_freq) + if config.freq.indexer is not None: + kwargs.update(config.freq.indexer) + return kwargs diff --git a/icclim/main.py b/icclim/main.py index 0667fe5f..93e8b6f3 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -318,9 +318,6 @@ def _setup(callback, callback_start_value, logs_verbosity, slice_mode): # TODO: it might be safer to feed a context manager which will setup # and teardown these confs xclim.set_options(data_validation="warn") - if Frequency.is_seasonal(slice_mode): - # for now seasonal slice_modes missing values cannot be checked - xclim.set_options(check_missing="skip") # keep attributes through xarray operations xr.set_options(keep_attrs=True) log.set_verbosity(logs_verbosity) diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 600740fa..726702d5 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from enum import Enum -from typing import Callable, List, Tuple, Union +from typing import Callable, List, Tuple, Union, Dict import cftime @@ -28,20 +28,15 @@ SON_MONTHS, ) - -def get_month_filter(month_list: list[int]) -> Callable: - return lambda da: filter_months(da, month_list) - - -def filter_months(da: DataArray, month_list: list[int]) -> DataArray: - return da.sel(time=da.time.dt.month.isin(month_list)) +SEASON_ERR_MSG = ( + f"A season created using `slice_mode` must be made of either" + f" consecutive integer for months such as [1,2,3] or two string for" + f" dates such as ['19 july', '14 august']." +) def get_seasonal_time_updater( - start_month: int, - end_month: int, - start_day: int = 1, - end_day: int = None + start_month: int, end_month: int, start_day: int = 1, end_day: int = None ) -> Callable[[DataArray], tuple[DataArray, DataArray]]: """Seasonal time updater and time bounds creator method generator. Returns a callable of DataArray which will rewrite the time dimension to @@ -155,112 +150,112 @@ class _Freq: def __init__( self, - panda_freq: str, + pandas_freq: str, accepted_values: list[str], description: str, post_processing: Callable[[DataArray], tuple[DataArray, DataArray]], - pre_processing: Callable[[DataArray], DataArray], + indexer: Indexer | None, ): - self.panda_freq: str = panda_freq + self.pandas_freq: str = pandas_freq self.accepted_values: list[str] = accepted_values self.description = description self.post_processing = post_processing - self.pre_processing = pre_processing + self.indexer = indexer class Frequency(Enum): """The sampling frequency of the resulting dataset.""" MONTH = _Freq( - panda_freq="MS", + pandas_freq="MS", accepted_values=["month", "MS"], description="monthly time series", + indexer=None, post_processing=_get_time_bounds_updater("MS"), - pre_processing=lambda x: x, ) - """ Resample to monthly values""" + """Resample to monthly values""" + + YEAR = _Freq( + pandas_freq="YS", + accepted_values=["year", "YS"], + description="annual time series", + indexer=None, + post_processing=_get_time_bounds_updater("YS"), + ) + """Resample to yearly values.""" AMJJAS = _Freq( - panda_freq="AS-APR", + pandas_freq="AS-APR", accepted_values=["AMJJAS"], description="summer half-year time series", + indexer=dict(month=AMJJAS_MONTHS), post_processing=get_seasonal_time_updater(AMJJAS_MONTHS[0], AMJJAS_MONTHS[-1]), - pre_processing=get_month_filter(AMJJAS_MONTHS), ) - """ Resample to summer half-year, from April to September included.""" + """Resample to summer half-year, from April to September included.""" ONDJFM = _Freq( - panda_freq="AS-OCT", + pandas_freq="AS-OCT", accepted_values=["ONDJFM"], description="winter half-year time series", + indexer=dict(month=ONDJFM_MONTHS), post_processing=get_seasonal_time_updater(ONDJFM_MONTHS[0], ONDJFM_MONTHS[-1]), - pre_processing=get_month_filter(ONDJFM_MONTHS), ) - """ Resample to winter half-year, from October to March included.""" + """Resample to winter half-year, from October to March included.""" DJF = _Freq( - panda_freq="AS-DEC", + pandas_freq="AS-DEC", accepted_values=["DJF"], description="winter time series", + indexer=dict(month=DJF_MONTHS), post_processing=get_seasonal_time_updater(DJF_MONTHS[0], DJF_MONTHS[-1]), - pre_processing=get_month_filter(DJF_MONTHS), ) - """ Resample to winter season, from December to February included.""" + """Resample to winter season, from December to February included.""" MAM = _Freq( - panda_freq="AS-MAR", + pandas_freq="AS-MAR", accepted_values=["MAM"], description="spring time series", + indexer=dict(month=MAM_MONTHS), post_processing=get_seasonal_time_updater(MAM_MONTHS[0], MAM_MONTHS[-1]), - pre_processing=get_month_filter(MAM_MONTHS), ) - """ Resample to spring season, from March to May included.""" + """Resample to spring season, from March to May included.""" JJA = _Freq( - panda_freq="AS-JUN", + pandas_freq="AS-JUN", accepted_values=["JJA"], description="summer time series", + indexer=dict(month=JJA_MONTHS), post_processing=get_seasonal_time_updater(JJA_MONTHS[0], JJA_MONTHS[-1]), - pre_processing=get_month_filter(JJA_MONTHS), ) - """ Resample to summer season, from June to Agust included.""" + """Resample to summer season, from June to Agust included.""" SON = _Freq( - panda_freq="AS-SEP", + pandas_freq="AS-SEP", accepted_values=["SON"], description="autumn time series", + indexer=dict(month=SON_MONTHS), post_processing=get_seasonal_time_updater(SON_MONTHS[0], SON_MONTHS[-1]), - pre_processing=get_month_filter(SON_MONTHS), ) - """ Resample to fall season, from September to November included.""" + """Resample to fall season, from September to November included.""" CUSTOM = _Freq( - panda_freq="MS", + pandas_freq="MS", accepted_values=[], description="", + indexer=None, post_processing=lambda x: x, - pre_processing=lambda x: x, ) - """ Placeholder instance for custom sampling frequencies. - Do not use as is, use `slice_mode` with "month", "season" or "dates" keywords - instead. + """Placeholder instance for custom sampling frequencies. + Do not use as is, use `slice_mode` with "month", "season" keywords instead. """ - YEAR = _Freq( - panda_freq="YS", - accepted_values=["year", "YS"], - description="annual time series", - post_processing=_get_time_bounds_updater("YS"), - pre_processing=lambda x: x, - ) - """ Resample to yearly values.""" def __init__(self, freq: _Freq): self._freq = freq @property - def panda_freq(self): - return self._freq.panda_freq + def pandas_freq(self): + return self._freq.pandas_freq @property def accepted_values(self): @@ -275,8 +270,8 @@ def post_processing(self): return self._freq.post_processing @property - def pre_processing(self): - return self._freq.pre_processing + def indexer(self): + return self._freq.indexer @staticmethod def lookup(slice_mode: SliceMode) -> Frequency: @@ -333,53 +328,16 @@ def _get_frequency_from_list(slice_mode_list: list) -> Frequency: f" When slice_mode is a list, its first element must be a keyword and" f" its second a list (e.g `slice_mode=['season', [1,2,3]]` )." ) - sampling_freq = slice_mode_list[0] + freq_keyword = slice_mode_list[0] custom_freq = Frequency.CUSTOM - if sampling_freq in ["month", "months"]: - season_bounds = slice_mode_list[1] - custom_freq._freq = _Freq( - pre_processing=get_month_filter(season_bounds), - post_processing=_get_filterer_and_time_updater(season_bounds), - panda_freq="MS", - description=f"monthly time series (months: {season_bounds})", - accepted_values=[], - ) - elif sampling_freq == "season": - season_bounds = slice_mode_list[1] - if isinstance(season_bounds, Tuple): - # concat in case of ([12], [1, 2]) - season_bounds = season_bounds[0] + season_bounds[1] - err_msg = f"A season created using `slice_mode` must be made of either" \ - f" consecutive integer for months such as [1,2,3] or two string for" \ - f" dates such as ['19 july', '14 august']." - if isinstance(season_bounds[0], str): - # season between two dates - if len(season_bounds) != 2: - raise InvalidIcclimArgumentError(err_msg) - begin_date, end_date, delta = read_dates(*season_bounds) - custom_freq._freq = _Freq( - pre_processing=_get_between_dates_filter(begin_date, end_date), - post_processing=get_seasonal_time_updater(begin_date.month, - end_date.month, - begin_date.day, - end_date.day), - # todo ideally we should offset panda_freq by {begin_date.day} days (tbd in xclim) on resample - panda_freq=f"AS-{MONTHS_MAP[begin_date.month]}", - description=f"seasonal time series (season: from {begin_date} to {end_date})", - accepted_values=[], - ) - elif isinstance(season_bounds[0], int): - # season made of consecutive months - if not _is_season_valid(season_bounds): - raise InvalidIcclimArgumentError(err_msg) - custom_freq._freq = _Freq( - pre_processing=get_month_filter(season_bounds), - post_processing=get_seasonal_time_updater(season_bounds[0], - season_bounds[-1]), - panda_freq=f"AS-{MONTHS_MAP[season_bounds[0]]}", - description=f"seasonal time series (season: {season_bounds})", - accepted_values=[], - ) + if freq_keyword in ["month", "months"]: + custom_freq._freq = _build_frequency_filtered_by_month(slice_mode_list[1]) + elif freq_keyword == "season": + season = slice_mode_list[1] + if isinstance(season[0], str): + custom_freq._freq = _build_seasonal_frequency_between_dates(season) + elif isinstance(season, Tuple) or isinstance(season[0], int): + custom_freq._freq = _build_seasonal_frequency_for_months(season) else: raise InvalidIcclimArgumentError( f"Unknown frequency {slice_mode_list}." @@ -388,23 +346,49 @@ def _get_frequency_from_list(slice_mode_list: list) -> Frequency: return custom_freq -def _get_between_dates_filter(begin_date: datetime, end_date: datetime): - return lambda da: filter_between_dates(da, begin_date, end_date) +def _build_frequency_filtered_by_month(months: List[int]): + return _Freq( + indexer=dict(month=months), + post_processing=_get_time_bounds_updater("MS"), + pandas_freq="MS", + description=f"monthly time series (months: {months})", + accepted_values=[], + ) -def filter_between_dates(da: DataArray, d1: datetime, d2: datetime): - between_dates_range = pd.date_range(d1, d2, freq="D").dayofyear - between_dates_mask = np.logical_and( - da.time.dt.dayofyear >= between_dates_range.min(), - da.time.dt.dayofyear <= between_dates_range.max()) - return da[between_dates_mask] +def _build_seasonal_frequency_between_dates(season: list[str]): + if len(season) != 2: + raise InvalidIcclimArgumentError(SEASON_ERR_MSG) + begin_date, end_date = _read_date(season[0]), _read_date(season[1]) + return _Freq( + indexer=dict( + date_bounds=(begin_date.strftime("%m-%d"), end_date.strftime("%m-%d")) + ), + post_processing=get_seasonal_time_updater( + begin_date.month, end_date.month, begin_date.day, end_date.day + ), + pandas_freq=f"AS-{MONTHS_MAP[begin_date.month]}", + description=f"seasonal time series (season: from {begin_date} to {end_date})", + accepted_values=[], + ) -def read_dates(d1: str, d2: str) -> tuple[datetime, datetime, int]: - return date1 := read_date(d1), date2 := read_date(d2), (date2 - date1).days +def _build_seasonal_frequency_for_months(season): + if isinstance(season, Tuple): + # concat in case of ([12], [1, 2]) + season = season[0] + season[1] + if not _is_season_valid(season): + raise InvalidIcclimArgumentError(SEASON_ERR_MSG) + return _Freq( + indexer=dict(month=season), + post_processing=get_seasonal_time_updater(season[0], season[-1]), + pandas_freq=f"AS-{MONTHS_MAP[season[0]]}", + description=f"seasonal time series (season: {season})", + accepted_values=[], + ) -def read_date(date_string: str) -> datetime: +def _read_date(date_string: str) -> datetime: error_msg = ( "The date {} does not have a valid format." " You can use various formats such as '2 december' or '02-12'." @@ -414,13 +398,8 @@ def read_date(date_string: str) -> datetime: return date -def _get_filterer_and_time_updater(season_bounds): - def filter_and_update_time(da): - res, bounds = _get_time_bounds_updater("MS")(da) - res = get_month_filter(season_bounds)(res) - return res, bounds - - return filter_and_update_time - - SliceMode = Union[Frequency, str, List[Union[str, Tuple, int]]] + +MonthsIndexer = Dict["month", List[int]] # format [12,1,2,3] +DatesIndexer = Dict["date_bounds", Tuple[str, str]] # format ("01-25", "02-28") +Indexer = Union[MonthsIndexer, DatesIndexer] diff --git a/icclim/models/index_config.py b/icclim/models/index_config.py index 7c976abc..0dd1c423 100644 --- a/icclim/models/index_config.py +++ b/icclim/models/index_config.py @@ -117,7 +117,6 @@ def __init__( base_period_time_range=base_period_time_range, only_leap_years=only_leap_years, chunk_it=chunk_it, - pre_processing=self.freq.pre_processing, ) for var_name in var_names ] @@ -181,15 +180,12 @@ def _build_cf_variable( base_period_time_range: list[str] | None, only_leap_years: bool, chunk_it: bool, - pre_processing: Callable, ) -> CfVariable: if chunk_it: da = da.chunk("auto") # noqa - typing fixed in futur xarray version study_da = _build_study_da(da, time_range, ignore_Feb29th) - study_da = pre_processing(study_da) if base_period_time_range is not None: reference_da = _build_reference_da(da, base_period_time_range, only_leap_years) - reference_da = pre_processing(reference_da) else: reference_da = study_da # TODO: all these operations should probably be added in history metadata diff --git a/icclim/models/user_index_config.py b/icclim/models/user_index_config.py index 4eb5df5a..da072dd5 100644 --- a/icclim/models/user_index_config.py +++ b/icclim/models/user_index_config.py @@ -4,6 +4,7 @@ from enum import Enum from typing import Any, Callable, Literal +from xclim.core.calendar import select_time from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError @@ -148,7 +149,10 @@ def __init__( self.date_event = date_event self.var_type = var_type self.is_percent = is_percent - self.da_ref = cf_vars[0].reference_da + if freq.indexer is not None: + for cf_var in cf_vars: + cf_var.study_da = select_time(cf_var.study_da, **freq.indexer) + cf_var.reference_da = select_time(cf_var.reference_da, **freq.indexer) self.cf_vars = cf_vars if thresh is not None and logical_operation is not None: self.nb_event_config = get_nb_event_conf( diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index 2e24fb78..34834983 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -3,7 +3,7 @@ import pytest from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.frequency import Frequency, filter_months, get_seasonal_time_updater +from icclim.models.frequency import Frequency, get_seasonal_time_updater from icclim.tests.test_utils import stub_tas @@ -31,21 +31,21 @@ def test_error(self): def test_month(self): freq = Frequency.lookup(["month", [1, 4, 3]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "MS" + assert freq.pandas_freq == "MS" assert freq.accepted_values == [] assert freq.post_processing is not None def test_season(self): freq = Frequency.lookup(["season", [1, 2, 3, 4]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "AS-JAN" + assert freq.pandas_freq == "AS-JAN" assert freq.accepted_values == [] assert freq.post_processing is not None def test_winter__deprecated_tuple(self): freq = Frequency.lookup(["season", ([11, 12], [1, 2, 3, 4])]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "AS-NOV" + assert freq.pandas_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None @@ -60,30 +60,17 @@ def test_error__weird_months(self): def test_winter(self): freq = Frequency.lookup(["season", [11, 12, 1, 2]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "AS-NOV" + assert freq.pandas_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_season_between_dates(self): freq = Frequency.lookup(["season",["07-19","08-14"]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "AS-JUL" + assert freq.pandas_freq == "AS-JUL" assert freq.accepted_values == [] assert freq.post_processing is not None - -class Test_filter_months: - def test_simple(self): - # WHEN - da = filter_months(stub_tas(), [1, 2, 7]) - # THEN - months = np.unique(da.time.dt.month) - assert len(months) == 3 - assert months[0] == 1 - assert months[1] == 2 - assert months[2] == 7 - - class Test_seasons_resampler: def test_simple(self): # WHEN @@ -125,3 +112,8 @@ def test_between_dates(self, use_cf): np.testing.assert_array_equal(1, da_res) # data must be unchanged assert time_bds_res[0].data[0] == pd.to_datetime("2041-11-02") assert time_bds_res[0].data[1] == pd.to_datetime("2042-01-30") + + + +def filter_months(da, month_list: list[int]): + return da.sel(time=da.time.dt.month.isin(month_list)) diff --git a/icclim/tests/test_generated_api.py b/icclim/tests/test_generated_api.py index 49c5fce0..d522c100 100644 --- a/icclim/tests/test_generated_api.py +++ b/icclim/tests/test_generated_api.py @@ -95,11 +95,16 @@ def test_custom_index(index_fun_mock: MagicMock): # integration test def test_txx__season_slice_mode(): + # GIVEN tas = stub_tas() - tas.loc[{"time": "2042-02-02"}] = 295 - tas.loc[{"time": "2042-01-01"}] = 303.15 # 30ºC 273.15 + tas.loc[{"time": "2043-02-02"}] = 295 + tas.loc[{"time": "2043-01-01"}] = 303.15 # 30ºC 273.15 + # WHEN res = icclim.txx(tas, slice_mode=["season", [11, 12, 1, 2]]).compute() - np.testing.assert_array_equal(res.TXx.isel(time=0), 30) + # THEN + # missing values for nov, dec of first period + np.testing.assert_array_equal(res.TXx.isel(time=0), np.NAN) + np.testing.assert_array_equal(res.TXx.isel(time=1), 30.) np.testing.assert_array_equal( res.time_bounds.isel(time=0), [np.datetime64("2041-11-01"), np.datetime64("2042-02-28")], @@ -112,7 +117,8 @@ def test_txx__months_slice_mode(): tas.loc[{"time": "2042-01-01"}] = 303.15 # 30ºC 273.15 res = icclim.txx(tas, slice_mode=["months", [11, 1]]).compute() np.testing.assert_array_equal(res.TXx.isel(time=0), 30) - np.testing.assert_almost_equal(res.TXx.isel(time=1), 21.85) + np.testing.assert_array_equal(res.TXx.isel(time=1), np.NAN) + np.testing.assert_almost_equal(res.TXx.sel(time="2042-11"), 21.85) np.testing.assert_array_equal( res.time_bounds.isel(time=0), [np.datetime64("2042-01-01"), np.datetime64("2042-01-31")], diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 1fa9731c..60737100 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -99,12 +99,11 @@ def test_index_SU__monthy_sampled(self): def test_index_SU__monthy_sampled_cf_time(self): res = icclim.index( indice_name="SU", - in_files=self.data, + in_files=self.data_cf_time, out_file=self.OUTPUT_FILE, slice_mode=Frequency.MONTH, ) np.testing.assert_array_equal(0, res.SU) - res.time_bounds.isel(time=0) np.testing.assert_array_equal( len(np.unique(self.TIME_RANGE.year)) * 12, len(res.time) ) @@ -118,12 +117,13 @@ def test_index_SU__monthy_sampled_cf_time(self): def test_index_SU__DJF_cf_time(self): res = icclim.index( indice_name="SU", - in_files=self.data, + in_files=self.data_cf_time, out_file=self.OUTPUT_FILE, slice_mode=Frequency.DJF, ) - np.testing.assert_array_equal(0, res.SU) - # 1 more year as DJF sampling create a months withs nans before + np.testing.assert_array_equal(res.SU.isel(time=0), np.NAN) + np.testing.assert_array_equal(res.SU.isel(time=1), 0) + # "+ 1" because DJF sampling create a december month with nans before for first year np.testing.assert_array_equal( len(np.unique(self.TIME_RANGE.year)) + 1, len(res.time) ) @@ -141,6 +141,16 @@ def test_indices_from_DataArray(self): for i in HEAT_INDICES: assert res[i] is not None + def test_indices__snow_indices(self): + ds = self.data.to_dataset(name="tas") + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + res = icclim.indices( + index_group=IndexGroup.SNOW, in_files=ds, out_file=self.OUTPUT_FILE + ) + for i in filter(lambda i: i.group == IndexGroup.SNOW, EcadIndex): + assert res[i] is not None + def test_indices_all_from_Dataset(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data @@ -162,13 +172,20 @@ def test_indices_all_ignore_error(self): ds["pr"] = self.data.copy(deep=True) ds["pr"].attrs["units"] = "kg m-2 d-1" res: xr.Dataset = icclim.indices( - index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, ignore_error=True - ) + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + ignore_error=True, + slice_mode="DJF" + ).compute() for i in EcadIndex: - # No variable in input to compute for snow indices + # No variable in input to compute snow indices if i.group == IndexGroup.SNOW: assert res.data_vars.get(i.short_name, None) is None else: + # print("----------") + # print(i.short_name) + # print(res[i.short_name]) assert res[i.short_name] is not None def test_indices_all_error(self): diff --git a/icclim/user_indices/calc_operation.py b/icclim/user_indices/calc_operation.py index 67f141a4..cc0b7c28 100644 --- a/icclim/user_indices/calc_operation.py +++ b/icclim/user_indices/calc_operation.py @@ -3,6 +3,7 @@ from enum import Enum from typing import Callable, Literal +import xclim.core.calendar from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError, MissingIcclimInputError @@ -54,7 +55,7 @@ def run_sum(config: UserIndexConfig): extreme_mode=config.extreme_mode, window_width=config.window_width, coef=config.coef, - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, date_event=config.date_event, ) @@ -69,7 +70,7 @@ def run_mean(config: UserIndexConfig): extreme_mode=config.extreme_mode, window_width=config.window_width, coef=config.coef, - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, date_event=config.date_event, ) @@ -90,7 +91,7 @@ def max_consecutive_event_count(config: UserIndexConfig): logical_operation=config.logical_operation, threshold=config.thresh, coef=config.coef, - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, date_event=config.date_event, ) @@ -108,7 +109,7 @@ def count_events(config: UserIndexConfig): link_logical_operations=config.nb_event_config.link_logical_operations, thresholds=config.nb_event_config.thresholds, coef=config.coef, - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, date_event=config.date_event, ) @@ -120,7 +121,7 @@ def sum(config: UserIndexConfig): coef=config.coef, logical_operation=config.logical_operation, threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, ) @@ -131,7 +132,7 @@ def mean(config: UserIndexConfig): coef=config.coef, logical_operation=config.logical_operation, threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, ) @@ -150,7 +151,7 @@ def _simple_reducer(op: Callable, config: UserIndexConfig): coef=config.coef, logical_operation=config.logical_operation, threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.panda_freq, + freq=config.freq.pandas_freq, date_event=config.date_event, ) From 1b74eaf3abd7d4a8535a93a5378f8ede7bae1495 Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 6 May 2022 08:41:59 +0200 Subject: [PATCH 04/10] MAINT: Add `dateparser` to dependencies --- environment.yml | 1 + requirements.txt | 1 + requirements_dev.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/environment.yml b/environment.yml index e10d070a..8491f734 100644 --- a/environment.yml +++ b/environment.yml @@ -17,6 +17,7 @@ dependencies: - psutil - zarr - fsspec + - dateparser # Dev dependencies - pre-commit - pydata-sphinx-theme diff --git a/requirements.txt b/requirements.txt index a9046d25..da05343c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ cftime dask[array] +dateparser distributed fsspec netCDF4~=1.5.7 diff --git a/requirements_dev.txt b/requirements_dev.txt index 9576427e..2add5a3d 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,7 @@ black cftime~=1.5.0 dask +dateparser flake8 isort netCDF4~=1.5.7 From 11ce826f1f95245ad2c2a669cb9f7353e54908c7 Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 6 May 2022 08:45:31 +0200 Subject: [PATCH 05/10] MAINT: Lint and clean --- icclim/ecad_functions.py | 58 ++++++++++++++++++++++----- icclim/main.py | 5 ++- icclim/models/frequency.py | 32 ++++++++------- icclim/models/user_index_config.py | 2 +- icclim/tests/test_frequency.py | 12 +++--- icclim/tests/test_generated_api.py | 10 +++-- icclim/tests/test_main.py | 4 +- icclim/tests/test_utils.py | 6 +-- icclim/user_indices/calc_operation.py | 1 - 9 files changed, 87 insertions(+), 43 deletions(-) diff --git a/icclim/ecad_functions.py b/icclim/ecad_functions.py index 9bfbba9c..e05120db 100644 --- a/icclim/ecad_functions.py +++ b/icclim/ecad_functions.py @@ -3,7 +3,7 @@ metadata to it. """ import re -from typing import Callable, Optional, Tuple, Any +from typing import Any, Callable, Optional, Tuple from warnings import warn import numpy as np @@ -823,7 +823,11 @@ def _compute_percentile_doy( callback: Callable = None, ) -> DataArray: per = percentile_doy( - da, window, percentile, alpha=interpolation.alpha, beta=interpolation.beta, + da, + window, + percentile, + alpha=interpolation.alpha, + beta=interpolation.beta, ) if callback is not None: callback(50) @@ -859,7 +863,10 @@ def _filter_in_wet_days(da: DataArray, dry_day_value: float): def _compute_threshold_index( - da: DataArray, threshold: float, freq_config: dict, xclim_index_fun: Callable, + da: DataArray, + threshold: float, + freq_config: dict, + xclim_index_fun: Callable, ) -> DataArray: result = xclim_index_fun(da, thresh=f"{threshold} °C", **freq_config) return result @@ -877,7 +884,11 @@ def _compute_spell_duration( xclim_index_fun: Callable, ) -> Tuple[DataArray, Optional[DataArray]]: per = _compute_percentile_doy( - cf_var.reference_da, per_thresh, per_window, per_interpolation, callback, + cf_var.reference_da, + per_thresh, + per_window, + per_interpolation, + callback, ) run_bootstrap = _can_run_bootstrap(cf_var) result = xclim_index_fun( @@ -950,16 +961,26 @@ def compute_compound_index( Otherwise, returns the index_result """ tas_per = _compute_percentile_doy( - tas.reference_da, tas_per_thresh, per_window, per_interpolation, callback, + tas.reference_da, + tas_per_thresh, + per_window, + per_interpolation, + callback, ) tas_per = tas_per.squeeze(PERCENTILES_COORD, drop=True) pr_in_base = _filter_in_wet_days(pr.reference_da, dry_day_value=np.NAN) pr_out_of_base = _filter_in_wet_days(pr.study_da, dry_day_value=0) pr_per = _compute_percentile_doy( - pr_in_base, pr_per_thresh, per_window, per_interpolation, callback, + pr_in_base, + pr_per_thresh, + per_window, + per_interpolation, + callback, ) pr_per = pr_per.squeeze(PERCENTILES_COORD, drop=True) - result = xclim_index_fun(tas.study_da, tas_per, pr_out_of_base, pr_per, **freq_config) + result = xclim_index_fun( + tas.study_da, tas_per, pr_out_of_base, pr_per, **freq_config + ) if save_percentile: # FIXME, not consistent with other percentile based indices # We should probably return a Tuple (res, [tas_per, pr_per]) @@ -982,7 +1003,11 @@ def _compute_rxxptot( base_wet_days, per_interpolation, pr_per_thresh ) result = atmos.fraction_over_precip_thresh( - pr.study_da, per, thresh="1 mm/day", **freq_config, bootstrap=False, + pr.study_da, + per, + thresh="1 mm/day", + **freq_config, + bootstrap=False, ).squeeze(PERCENTILES_COORD, drop=True) result = result * 100 result.attrs["units"] = "%" @@ -1004,7 +1029,11 @@ def _compute_rxxp( base_wet_days, per_interpolation, pr_per_thresh ) result = atmos.days_over_precip_thresh( - pr.study_da, per, thresh="1 mm/day", **freq_config, bootstrap=False, + pr.study_da, + per, + thresh="1 mm/day", + **freq_config, + bootstrap=False, ) result = result.squeeze(PERCENTILES_COORD, drop=True) if is_percent: @@ -1027,10 +1056,17 @@ def _compute_temperature_percentile_index( ) -> Tuple[DataArray, Optional[DataArray]]: run_bootstrap = _can_run_bootstrap(cf_var) per = _compute_percentile_doy( - cf_var.reference_da, tas_per_thresh, per_window, per_interpolation, callback, + cf_var.reference_da, + tas_per_thresh, + per_window, + per_interpolation, + callback, ).compute() result = xclim_index_fun( - cf_var.study_da, per, **freq_config, bootstrap=run_bootstrap, + cf_var.study_da, + per, + **freq_config, + bootstrap=run_bootstrap, ).squeeze(PERCENTILES_COORD, drop=True) if run_bootstrap: result = _add_bootstrap_meta(result, per) diff --git a/icclim/main.py b/icclim/main.py index 93e8b6f3..f9e869e5 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -113,13 +113,14 @@ def index( index_name: str | None = None, # optional when computing user_indices var_name: str | list[str] | None = None, slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] = None, # TODO: use dateparser to accept strings + time_range: list[datetime] = None, # TODO: use dateparser to accept strings out_file: str | None = None, threshold: float | list[float] | None = None, callback: Callable[[int], None] = log.callback, callback_percentage_start_value: int = 0, callback_percentage_total: int = 100, - base_period_time_range: list[datetime] | None = None, # TODO: use dateparser to accept strings + base_period_time_range: list[datetime] + | None = None, # TODO: use dateparser to accept strings window_width: int = 5, only_leap_years: bool = False, ignore_Feb29th: bool = False, diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 726702d5..2f458602 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -7,10 +7,9 @@ from datetime import datetime, timedelta from enum import Enum -from typing import Callable, List, Tuple, Union, Dict +from typing import Callable, Dict, List, Literal, Tuple, Union import cftime - import dateparser import numpy as np import pandas as pd @@ -29,9 +28,9 @@ ) SEASON_ERR_MSG = ( - f"A season created using `slice_mode` must be made of either" - f" consecutive integer for months such as [1,2,3] or two string for" - f" dates such as ['19 july', '14 august']." + "A season created using `slice_mode` must be made of either" + " consecutive integer for months such as [1,2,3] or two string for" + " dates such as ['19 july', '14 august']." ) @@ -75,7 +74,7 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: end_month + 1, 1, calendar=first_time.calendar, - )- timedelta(days=1) + ) - timedelta(days=1) else: end = cftime.datetime( year_of_season_end, @@ -86,7 +85,9 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: else: start = pd.to_datetime(f"{year}-{start_month}-{start_day}") if end_day is None: - end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") - timedelta(days=1) + end = pd.to_datetime( + f"{year_of_season_end}-{end_month + 1}" + ) - timedelta(days=1) else: end = pd.to_datetime(f"{year_of_season_end}-{end_month}-{end_day}") new_time_axis.append(start + (end - start) / 2) @@ -249,7 +250,6 @@ class Frequency(Enum): Do not use as is, use `slice_mode` with "month", "season" keywords instead. """ - def __init__(self, freq: _Freq): self._freq = freq @@ -324,9 +324,9 @@ def _is_season_valid(months: list[int]) -> bool: def _get_frequency_from_list(slice_mode_list: list) -> Frequency: if len(slice_mode_list) < 2: raise InvalidIcclimArgumentError( - f"Invalid slice_mode format." - f" When slice_mode is a list, its first element must be a keyword and" - f" its second a list (e.g `slice_mode=['season', [1,2,3]]` )." + "Invalid slice_mode format." + " When slice_mode is a list, its first element must be a keyword and" + " its second a list (e.g `slice_mode=['season', [1,2,3]]` )." ) freq_keyword = slice_mode_list[0] custom_freq = Frequency.CUSTOM @@ -346,7 +346,7 @@ def _get_frequency_from_list(slice_mode_list: list) -> Frequency: return custom_freq -def _build_frequency_filtered_by_month(months: List[int]): +def _build_frequency_filtered_by_month(months: list[int]): return _Freq( indexer=dict(month=months), post_processing=_get_time_bounds_updater("MS"), @@ -393,13 +393,15 @@ def _read_date(date_string: str) -> datetime: "The date {} does not have a valid format." " You can use various formats such as '2 december' or '02-12'." ) - if (date := dateparser.parse(date_string)) == None: + if (date := dateparser.parse(date_string)) is None: raise InvalidIcclimArgumentError(error_msg.format(date_string)) return date SliceMode = Union[Frequency, str, List[Union[str, Tuple, int]]] -MonthsIndexer = Dict["month", List[int]] # format [12,1,2,3] -DatesIndexer = Dict["date_bounds", Tuple[str, str]] # format ("01-25", "02-28") +MonthsIndexer = Dict[Literal["month"], List[int]] # format [12,1,2,3] +DatesIndexer = Dict[ + Literal["date_bounds"], Tuple[str, str] +] # format ("01-25", "02-28") Indexer = Union[MonthsIndexer, DatesIndexer] diff --git a/icclim/models/user_index_config.py b/icclim/models/user_index_config.py index da072dd5..16e3e78e 100644 --- a/icclim/models/user_index_config.py +++ b/icclim/models/user_index_config.py @@ -4,8 +4,8 @@ from enum import Enum from typing import Any, Callable, Literal -from xclim.core.calendar import select_time from xarray.core.dataarray import DataArray +from xclim.core.calendar import select_time from icclim.icclim_exceptions import InvalidIcclimArgumentError from icclim.models.frequency import Frequency diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index 34834983..83fd5461 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -65,12 +65,13 @@ def test_winter(self): assert freq.post_processing is not None def test_lookup_season_between_dates(self): - freq = Frequency.lookup(["season",["07-19","08-14"]]) + freq = Frequency.lookup(["season", ["07-19", "08-14"]]) assert freq == Frequency.CUSTOM assert freq.pandas_freq == "AS-JUL" assert freq.accepted_values == [] assert freq.post_processing is not None + class Test_seasons_resampler: def test_simple(self): # WHEN @@ -104,16 +105,17 @@ def test_winter(self): @pytest.mark.parametrize("use_cf", [True, False]) def test_between_dates(self, use_cf): # WHEN - test_da = filter_months(stub_tas(use_cftime=use_cf), [11, 12, 1])\ - .resample(time="AS-NOV")\ + test_da = ( + filter_months(stub_tas(use_cftime=use_cf), [11, 12, 1]) + .resample(time="AS-NOV") .mean() + ) da_res, time_bds_res = get_seasonal_time_updater(11, 1, 2, 30)(test_da) # THEN - np.testing.assert_array_equal(1, da_res) # data must be unchanged + np.testing.assert_array_equal(1, da_res) # data must be unchanged assert time_bds_res[0].data[0] == pd.to_datetime("2041-11-02") assert time_bds_res[0].data[1] == pd.to_datetime("2042-01-30") - def filter_months(da, month_list: list[int]): return da.sel(time=da.time.dt.month.isin(month_list)) diff --git a/icclim/tests/test_generated_api.py b/icclim/tests/test_generated_api.py index d522c100..7410a03a 100644 --- a/icclim/tests/test_generated_api.py +++ b/icclim/tests/test_generated_api.py @@ -104,7 +104,7 @@ def test_txx__season_slice_mode(): # THEN # missing values for nov, dec of first period np.testing.assert_array_equal(res.TXx.isel(time=0), np.NAN) - np.testing.assert_array_equal(res.TXx.isel(time=1), 30.) + np.testing.assert_array_equal(res.TXx.isel(time=1), 30.0) np.testing.assert_array_equal( res.time_bounds.isel(time=0), [np.datetime64("2041-11-01"), np.datetime64("2042-02-28")], @@ -137,7 +137,9 @@ def test_txx__months_slice_mode(): (CalcOperation.MAX_NUMBER_OF_CONSECUTIVE_EVENTS, 1, 1), ], ) -def test_custom_index__season_slice_mode(operator, expectation_year_1, expectation_year_2): +def test_custom_index__season_slice_mode( + operator, expectation_year_1, expectation_year_2 +): tas = stub_tas(2.0) tas.loc[{"time": "2042-01-01"}] = 303.15 tas.loc[{"time": "2042-12-01"}] = 280.15 @@ -164,7 +166,9 @@ def test_custom_index__season_slice_mode(operator, expectation_year_1, expectati (CalcOperation.RUN_SUM, 14, 14), ], ) -def test_custom_index_run_algos__season_slice_mode(operator, expectation_year_1, expectation_year_2): +def test_custom_index_run_algos__season_slice_mode( + operator, expectation_year_1, expectation_year_2 +): tas = stub_tas(2.0) res = icclim.custom_index( in_files=tas, diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 60737100..c35e4e2f 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -123,7 +123,7 @@ def test_index_SU__DJF_cf_time(self): ) np.testing.assert_array_equal(res.SU.isel(time=0), np.NAN) np.testing.assert_array_equal(res.SU.isel(time=1), 0) - # "+ 1" because DJF sampling create a december month with nans before for first year + # "+ 1" because DJF sampling create a december month with nans before first year np.testing.assert_array_equal( len(np.unique(self.TIME_RANGE.year)) + 1, len(res.time) ) @@ -176,7 +176,7 @@ def test_indices_all_ignore_error(self): in_files=ds, out_file=self.OUTPUT_FILE, ignore_error=True, - slice_mode="DJF" + slice_mode="DJF", ).compute() for i in EcadIndex: # No variable in input to compute snow indices diff --git a/icclim/tests/test_utils.py b/icclim/tests/test_utils.py index 31001561..1924931b 100644 --- a/icclim/tests/test_utils.py +++ b/icclim/tests/test_utils.py @@ -3,14 +3,13 @@ import numpy as np import pandas as pd import xarray +import xarray as xr from icclim.models.frequency import Frequency from icclim.models.index_config import CfVariable from icclim.models.user_index_config import UserIndexConfig -import xarray as xr - -VALUE_COUNT = 365 * 5 + 1 # 5 years of data (with 1 leap year) +VALUE_COUNT = 365 * 5 + 1 # 5 years of data (with 1 leap year) COORDS = dict( lat=[42], lon=[42], @@ -20,6 +19,7 @@ CF_TIME_RANGE = xr.cftime_range("2042-01-01", periods=VALUE_COUNT, freq="D") + def stub_user_index(cf_vars: List[CfVariable]): return UserIndexConfig( index_name="Yolo", calc_operation="noop", freq=Frequency.MONTH, cf_vars=cf_vars diff --git a/icclim/user_indices/calc_operation.py b/icclim/user_indices/calc_operation.py index cc0b7c28..f0b229df 100644 --- a/icclim/user_indices/calc_operation.py +++ b/icclim/user_indices/calc_operation.py @@ -3,7 +3,6 @@ from enum import Enum from typing import Callable, Literal -import xclim.core.calendar from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError, MissingIcclimInputError From 3981c72114fba173705cd29421ff9d14236f626e Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 6 May 2022 09:22:01 +0200 Subject: [PATCH 06/10] FIX: typing --- icclim/ecad_functions.py | 40 ++++++++++++++++++---------------- icclim/tests/test_frequency.py | 2 ++ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/icclim/ecad_functions.py b/icclim/ecad_functions.py index e05120db..1d42cef3 100644 --- a/icclim/ecad_functions.py +++ b/icclim/ecad_functions.py @@ -2,8 +2,10 @@ All ECA&D functions. Each function wraps its xclim equivalent functions adding icclim metadata to it. """ +from __future__ import annotations + import re -from typing import Any, Callable, Optional, Tuple +from typing import Any, Callable from warnings import warn import numpy as np @@ -66,7 +68,7 @@ def id(config: IndexConfig) -> DataArray: ) -def csdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def csdi(config: IndexConfig) -> tuple[DataArray, DataArray | None]: thresh = 10 if config.threshold is None else config.threshold return _compute_spell_duration( cf_var=config.tasmin, @@ -81,7 +83,7 @@ def csdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tg10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tg10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tas, freq_config=_build_frequency_kwargs(config), @@ -95,7 +97,7 @@ def tg10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tn10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tn10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tasmin, freq_config=_build_frequency_kwargs(config), @@ -109,7 +111,7 @@ def tn10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tx10p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tx10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tasmax, freq_config=_build_frequency_kwargs(config), @@ -160,7 +162,7 @@ def tr(config: IndexConfig) -> DataArray: ) -def wsdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def wsdi(config: IndexConfig) -> tuple[DataArray, DataArray | None]: thresh = 90 if config.threshold is None else config.threshold return _compute_spell_duration( cf_var=config.tasmax, @@ -175,7 +177,7 @@ def wsdi(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tg90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tg90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tas, freq_config=_build_frequency_kwargs(config), @@ -189,7 +191,7 @@ def tg90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tn90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tn90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tasmin, freq_config=_build_frequency_kwargs(config), @@ -203,7 +205,7 @@ def tn90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def tx90p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def tx90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_temperature_percentile_index( cf_var=config.tasmax, freq_config=_build_frequency_kwargs(config), @@ -295,7 +297,7 @@ def rx5day(config: IndexConfig) -> DataArray: return result -def r75p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r75p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxp( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -306,7 +308,7 @@ def r75p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def r75ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r75ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxptot( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -316,7 +318,7 @@ def r75ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def r95p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r95p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxp( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -327,7 +329,7 @@ def r95p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def r95ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r95ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxptot( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -337,7 +339,7 @@ def r95ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def r99p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r99p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxp( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -348,7 +350,7 @@ def r99p(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: ) -def r99ptot(config: IndexConfig) -> Tuple[DataArray, Optional[DataArray]]: +def r99ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: return _compute_rxxptot( pr=config.pr, freq_config=_build_frequency_kwargs(config), @@ -882,7 +884,7 @@ def _compute_spell_duration( save_percentile: bool, callback: Callable, xclim_index_fun: Callable, -) -> Tuple[DataArray, Optional[DataArray]]: +) -> tuple[DataArray, DataArray | None]: per = _compute_percentile_doy( cf_var.reference_da, per_thresh, @@ -997,7 +999,7 @@ def _compute_rxxptot( pr_per_thresh: float, per_interpolation: QuantileInterpolation, save_percentile: bool, -) -> Tuple[DataArray, Optional[DataArray]]: +) -> tuple[DataArray, DataArray | None]: base_wet_days = _filter_in_wet_days(pr.reference_da, dry_day_value=np.nan) per = _compute_percentile_over_period( base_wet_days, per_interpolation, pr_per_thresh @@ -1023,7 +1025,7 @@ def _compute_rxxp( per_interpolation: QuantileInterpolation, save_percentile: bool, is_percent: bool, -) -> Tuple[DataArray, Optional[DataArray]]: +) -> tuple[DataArray, DataArray | None]: base_wet_days = _filter_in_wet_days(pr.reference_da, dry_day_value=np.nan) per = _compute_percentile_over_period( base_wet_days, per_interpolation, pr_per_thresh @@ -1053,7 +1055,7 @@ def _compute_temperature_percentile_index( is_percent: bool, callback: Callable, xclim_index_fun: Callable, -) -> Tuple[DataArray, Optional[DataArray]]: +) -> tuple[DataArray, DataArray | None]: run_bootstrap = _can_run_bootstrap(cf_var) per = _compute_percentile_doy( cf_var.reference_da, diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index 83fd5461..5de4796d 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy as np import pandas as pd import pytest From 387456191a45892c06c48b6675fb6e9849d18257 Mon Sep 17 00:00:00 2001 From: abel Date: Fri, 20 May 2022 12:03:02 +0200 Subject: [PATCH 07/10] ENH: Re-Add season clipped to bounds --- doc/source/references/icclim_index_api.rst | 26 ++++-- icclim/main.py | 22 ++++- icclim/models/ecad_indices.py | 16 ++++ icclim/models/frequency.py | 72 ++++++++++++--- icclim/models/index_config.py | 6 ++ icclim/tests/test_main.py | 100 +++++++++++++++++++-- setup.py | 9 ++ 7 files changed, 224 insertions(+), 27 deletions(-) diff --git a/doc/source/references/icclim_index_api.rst b/doc/source/references/icclim_index_api.rst index 71cc2f97..9c35ff56 100644 --- a/doc/source/references/icclim_index_api.rst +++ b/doc/source/references/icclim_index_api.rst @@ -68,15 +68,31 @@ summer, autumn and monthly frequency: | The winter season (``DJF``) of 2000 is composed of December 2000, January 2001 and February 2001. | Likewise, the winter half-year (``ONDJFM``) of 2000 includes October 2000, November 2000, December 2000, January 2001, February 2001 and March 2001. -Monthly time series with months selected by user (the keyword can be either "month" or "months"): +Monthly time series filter +++++++++++++++++++++++++++ +Monthly time series with months selected by user (the keyword can be either `month` or `months`): >>> slice_mode = ['month', [4,5,11]] # index will be computed only for April, May and November or >>> slice_mode = ['month', [4]] # index will be computed only for April -User defined seasons: - >>> slice_mode = ['season', [4,5,6,7]] - or - >>> slice_mode = ['season', ([11, 12, 1])] +User defined seasons +++++++++++++++++++++ +You can either defined seasons aware of data outside their bounds (keyword `season`) or +seasons which clip all data outside their bounds (keyword `clipped_season`). +The later is most useful on indices computing spells, if you want to totally ignore spells that could +have started before your custom season. + >>> slice_mode = ['season', [4,5,6,7]] # March to July un-clipped +or + >>> slice_mode = ['season', [11, 12, 1]] # November to January un-clipped + + >>> slice_mode = ['clipped_season', [4,5,6,7]] +or + >>> slice_mode = ['clipped_season', ([11, 12, 1])] + +Additionally, you can define a season between two exact date: + >>> slice_mode = ['season', ["07-19", "08-14"]] +or + >>> slice_mode = ["clipped_season", ["07-19", "08-14"]] ``threshold`` ~~~~~~~~~~~~~ diff --git a/icclim/main.py b/icclim/main.py index f9e869e5..425973ce 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -21,7 +21,7 @@ from icclim.icclim_exceptions import InvalidIcclimArgumentError from icclim.icclim_logger import IcclimLogger, Verbosity from icclim.models.constants import ICCLIM_VERSION -from icclim.models.ecad_indices import EcadIndex +from icclim.models.ecad_indices import EcadIndex, get_season_excluded_indices from icclim.models.frequency import Frequency, SliceMode from icclim.models.index_group import IndexGroup from icclim.models.netcdf_version import NetcdfVersion @@ -234,7 +234,6 @@ def index( " You must provide either `user_index` to compute a customized index" " or `index_name` for one of the ECA&D indices." ) - index: EcadIndex | None if index_name is not None: index = EcadIndex.lookup(index_name) else: @@ -261,6 +260,7 @@ def index( if user_index is not None: result_ds = _compute_user_index_dataset(config=config, user_index=user_index) else: + _check_valid_config(index, config) result_ds = _compute_ecad_index_dataset( config=config, index=index, @@ -393,7 +393,12 @@ def _compute_ecad_index( ) -> Dataset: logging.info(f"Calculating climate index: {index.short_name}") result_ds = Dataset() - res = index.compute(config) + if config.freq.time_clipping is not None: + # xclim missing feature will not work with clipped time + with xclim.set_options(check_missing="skip"): + res = index.compute(config) + else: + res = index.compute(config) if isinstance(res, tuple): da, per = res else: @@ -503,3 +508,14 @@ def _guess_variable_names( f" from your input dataset: {list(ds.data_vars)}." ) return res + + +def _check_valid_config(index: EcadIndex, config: IndexConfig): + if index in get_season_excluded_indices() and config.freq.indexer is not None: + raise InvalidIcclimArgumentError( + "Indices computing a spell cannot be computed on un-clipped season for now." + " Instead, you can use a clipped_season like this:" + "`slice_mode=['clipped_season', [12,1,2]]` (example of a DJF season)." + " However, it will NOT take into account spells beginning before the season" + " start!" + ) diff --git a/icclim/models/ecad_indices.py b/icclim/models/ecad_indices.py index 0bcd0f37..d299c2a2 100644 --- a/icclim/models/ecad_indices.py +++ b/icclim/models/ecad_indices.py @@ -472,3 +472,19 @@ def list() -> list[str]: fashion. """ return [f"{i.group.value} | {i.short_name} | {i.definition}" for i in EcadIndex] + + +def get_season_excluded_indices() -> list[EcadIndex]: + return [ + EcadIndex.WSDI, + EcadIndex.CSU, + EcadIndex.CFD, + EcadIndex.CSDI, + EcadIndex.CDD, + EcadIndex.CWD, + EcadIndex.RX5DAY, + EcadIndex.CD, + EcadIndex.CW, + EcadIndex.WD, + EcadIndex.WW, + ] diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 2f458602..a3d3c54e 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -156,12 +156,17 @@ def __init__( description: str, post_processing: Callable[[DataArray], tuple[DataArray, DataArray]], indexer: Indexer | None, + time_clipping: Callable = None, ): self.pandas_freq: str = pandas_freq self.accepted_values: list[str] = accepted_values self.description = description self.post_processing = post_processing self.indexer = indexer + # time_clipping is a workaround for a "missing" feature of xclim. + # It allow to compute seasons for indices computing spells by ignoring values + # outside the season bounds. + self.time_clipping = time_clipping class Frequency(Enum): @@ -273,6 +278,10 @@ def post_processing(self): def indexer(self): return self._freq.indexer + @property + def time_clipping(self): + return self._freq.time_clipping + @staticmethod def lookup(slice_mode: SliceMode) -> Frequency: if isinstance(slice_mode, Frequency): @@ -334,10 +343,10 @@ def _get_frequency_from_list(slice_mode_list: list) -> Frequency: custom_freq._freq = _build_frequency_filtered_by_month(slice_mode_list[1]) elif freq_keyword == "season": season = slice_mode_list[1] - if isinstance(season[0], str): - custom_freq._freq = _build_seasonal_frequency_between_dates(season) - elif isinstance(season, Tuple) or isinstance(season[0], int): - custom_freq._freq = _build_seasonal_frequency_for_months(season) + custom_freq._freq = _build_seasonal_freq(season, False) + elif freq_keyword == "clipped_season": + season = slice_mode_list[1] + custom_freq._freq = _build_seasonal_freq(season, True) else: raise InvalidIcclimArgumentError( f"Unknown frequency {slice_mode_list}." @@ -356,31 +365,52 @@ def _build_frequency_filtered_by_month(months: list[int]): ) -def _build_seasonal_frequency_between_dates(season: list[str]): +def _build_seasonal_freq(season: tuple | list, clipped: bool): + if isinstance(season[0], str): + return _build_seasonal_frequency_between_dates(season, clipped) + elif isinstance(season, Tuple) or isinstance(season[0], int): + return _build_seasonal_frequency_for_months(season, clipped) + + +def _build_seasonal_frequency_between_dates(season: list[str], clipped: bool): if len(season) != 2: raise InvalidIcclimArgumentError(SEASON_ERR_MSG) begin_date, end_date = _read_date(season[0]), _read_date(season[1]) - return _Freq( - indexer=dict( + if clipped: + indexer = None + time_clipping = _get_filter_between_dates(begin_date, end_date) + else: + indexer = dict( date_bounds=(begin_date.strftime("%m-%d"), end_date.strftime("%m-%d")) - ), + ) + time_clipping = None + return _Freq( + indexer=indexer, post_processing=get_seasonal_time_updater( begin_date.month, end_date.month, begin_date.day, end_date.day ), pandas_freq=f"AS-{MONTHS_MAP[begin_date.month]}", description=f"seasonal time series (season: from {begin_date} to {end_date})", accepted_values=[], + time_clipping=time_clipping, ) -def _build_seasonal_frequency_for_months(season): +def _build_seasonal_frequency_for_months(season: tuple | list, clipped: bool): if isinstance(season, Tuple): # concat in case of ([12], [1, 2]) season = season[0] + season[1] if not _is_season_valid(season): raise InvalidIcclimArgumentError(SEASON_ERR_MSG) + if clipped: + indexer = None + time_clipping = _get_month_filter(season) + else: + indexer = dict(month=season) + time_clipping = None return _Freq( - indexer=dict(month=season), + indexer=indexer, + time_clipping=time_clipping, post_processing=get_seasonal_time_updater(season[0], season[-1]), pandas_freq=f"AS-{MONTHS_MAP[season[0]]}", description=f"seasonal time series (season: {season})", @@ -398,10 +428,30 @@ def _read_date(date_string: str) -> datetime: return date +def _get_filter_between_dates(begin_date: datetime, end_date: datetime): + def filter_between_dates(da: DataArray): + between_dates_range = pd.date_range(begin_date, end_date, freq="D").dayofyear + between_dates_mask = np.logical_and( + da.time.dt.dayofyear >= between_dates_range.min(), + da.time.dt.dayofyear <= between_dates_range.max(), + ) + return da[between_dates_mask] + + return lambda da: filter_between_dates(da) + + +def _get_month_filter(month_list: list[int]) -> Callable: + def filter_months(da: DataArray) -> DataArray: + return da.sel(time=da.time.dt.month.isin(month_list)) + + return lambda da: filter_months(da) + + SliceMode = Union[Frequency, str, List[Union[str, Tuple, int]]] MonthsIndexer = Dict[Literal["month"], List[int]] # format [12,1,2,3] DatesIndexer = Dict[ Literal["date_bounds"], Tuple[str, str] ] # format ("01-25", "02-28") -Indexer = Union[MonthsIndexer, DatesIndexer] +ClippedSeasonIndexer = Callable +Indexer = Union[MonthsIndexer, DatesIndexer, ClippedSeasonIndexer] diff --git a/icclim/models/index_config.py b/icclim/models/index_config.py index 0dd1c423..35ae56c2 100644 --- a/icclim/models/index_config.py +++ b/icclim/models/index_config.py @@ -117,6 +117,7 @@ def __init__( base_period_time_range=base_period_time_range, only_leap_years=only_leap_years, chunk_it=chunk_it, + time_clipping=self.freq.time_clipping, ) for var_name in var_names ] @@ -180,12 +181,17 @@ def _build_cf_variable( base_period_time_range: list[str] | None, only_leap_years: bool, chunk_it: bool, + time_clipping: Callable | None, ) -> CfVariable: if chunk_it: da = da.chunk("auto") # noqa - typing fixed in futur xarray version study_da = _build_study_da(da, time_range, ignore_Feb29th) + if time_clipping is not None: + study_da = time_clipping(study_da) if base_period_time_range is not None: reference_da = _build_reference_da(da, base_period_time_range, only_leap_years) + if time_clipping is not None: + reference_da = time_clipping(reference_da) else: reference_da = study_da # TODO: all these operations should probably be added in history metadata diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index c35e4e2f..480e0d69 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -8,8 +8,9 @@ import xarray as xr import icclim +from icclim.icclim_exceptions import InvalidIcclimArgumentError from icclim.models.constants import ICCLIM_VERSION -from icclim.models.ecad_indices import EcadIndex +from icclim.models.ecad_indices import EcadIndex, get_season_excluded_indices from icclim.models.frequency import Frequency from icclim.models.index_group import IndexGroup @@ -149,9 +150,21 @@ def test_indices__snow_indices(self): index_group=IndexGroup.SNOW, in_files=ds, out_file=self.OUTPUT_FILE ) for i in filter(lambda i: i.group == IndexGroup.SNOW, EcadIndex): - assert res[i] is not None + assert res[i.short_name] is not None def test_indices_all_from_Dataset(self): + ds = self.data.to_dataset(name="tas") + ds["tasmax"] = self.data + ds["tasmin"] = self.data + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + res = icclim.indices(index_group="all", in_files=ds, out_file=self.OUTPUT_FILE) + for i in EcadIndex: + assert res[i.short_name] is not None + + def test_indices_all_from_Dataset__seasonal_clip(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data @@ -160,7 +173,81 @@ def test_indices_all_from_Dataset(self): ds["prec"] = self.data.copy(deep=True) ds["prec"].attrs["units"] = "cm" res = icclim.indices( - index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, ignore_error=True + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + slice_mode=["clipped_season", [1, 2, 3]], + ) + for i in EcadIndex: + assert res[i.short_name] is not None + + def test_indices_all_from_Dataset__between_dates_seasonal_clip(self): + ds = self.data.to_dataset(name="tas") + ds["tasmax"] = self.data + ds["tasmin"] = self.data + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + res = icclim.indices( + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + slice_mode=["clipped_season", ["07-19", "08-14"]], + ) + for i in EcadIndex: + assert res[i.short_name] is not None + + def test_indices_all_from_Dataset__JFM_seasonal_clip(self): + ds = self.data.to_dataset(name="tas") + ds["tasmax"] = self.data + ds["tasmin"] = self.data + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + res = icclim.indices( + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + slice_mode=["clipped_season", [1, 2, 3]], + ) + for i in EcadIndex: + assert res[i.short_name] is not None + + def test_indices_all_from_Dataset__seasonal_error(self): + # GIVEN + ds = self.data.to_dataset(name="tas") + ds["tasmax"] = self.data + ds["tasmin"] = self.data + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + # THEN + with pytest.raises(InvalidIcclimArgumentError): + # WHEN + icclim.indices( + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + slice_mode=["season", [1, 2, 3]], + ) + + # @pytest.mark.skip(reason="BUG: (add gh link)") + def test_indices_all_from_Dataset__between_year_clipped_season(self): + ds = self.data.to_dataset(name="tas") + ds["tasmax"] = self.data + ds["tasmin"] = self.data + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["prec"] = self.data.copy(deep=True) + ds["prec"].attrs["units"] = "cm" + res = icclim.indices( + index_group="all", + in_files=ds, + out_file=self.OUTPUT_FILE, + slice_mode=["clipped_season", [12, 1, 2, 3]], ) for i in EcadIndex: assert res[i.short_name] is not None @@ -180,15 +267,12 @@ def test_indices_all_ignore_error(self): ).compute() for i in EcadIndex: # No variable in input to compute snow indices - if i.group == IndexGroup.SNOW: + if i.group == IndexGroup.SNOW or i in get_season_excluded_indices(): assert res.data_vars.get(i.short_name, None) is None else: - # print("----------") - # print(i.short_name) - # print(res[i.short_name]) assert res[i.short_name] is not None - def test_indices_all_error(self): + def test_indices_all__error(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data diff --git a/setup.py b/setup.py index 275b1672..ab2476eb 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,16 @@ "rechunker>=0.3, !=0.4", "fsspec", "dateparser", + "b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3", ] +dependency_links = ( + [ + # TODO: Remove once xclim 0.37 is released + "https://github.com/Ouranosinc/xclim/tarball/master#egg=b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3" # noqa + ], +) + setup( name="icclim", version=ICCLIM_VERSION, @@ -34,6 +42,7 @@ include_package_data=True, url="https://github.com/cerfacs-globc/icclim", install_requires=MINIMAL_REQUIREMENTS, + dependency_links=dependency_links, python_requires=">=3.8", classifiers=[ "Programming Language :: Python", From e0ddd8e514ee9341df2858579e82d9629b8f6d8b Mon Sep 17 00:00:00 2001 From: abel Date: Mon, 23 May 2022 17:42:39 +0200 Subject: [PATCH 08/10] MAINT: Use xclim `select_time` to clip things There is no real benefit of maintaining an icclim specific implementation for the same functions. --- icclim/models/frequency.py | 36 +++++++++++++++-------------------- icclim/models/index_config.py | 13 ++++++------- icclim/tests/test_main.py | 1 - 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index a3d3c54e..b6f0e194 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd import xarray as xr +import xclim.core.calendar from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError @@ -375,14 +376,15 @@ def _build_seasonal_freq(season: tuple | list, clipped: bool): def _build_seasonal_frequency_between_dates(season: list[str], clipped: bool): if len(season) != 2: raise InvalidIcclimArgumentError(SEASON_ERR_MSG) - begin_date, end_date = _read_date(season[0]), _read_date(season[1]) + begin_date = _read_date(season[0]) + end_date = _read_date(season[1]) + begin_formatted = begin_date.strftime("%m-%d") + end_formatted = end_date.strftime("%m-%d") if clipped: indexer = None - time_clipping = _get_filter_between_dates(begin_date, end_date) + time_clipping = _get_filter_between_dates(begin_formatted, end_formatted) else: - indexer = dict( - date_bounds=(begin_date.strftime("%m-%d"), end_date.strftime("%m-%d")) - ) + indexer = dict(date_bounds=(begin_formatted, end_formatted)) time_clipping = None return _Freq( indexer=indexer, @@ -390,7 +392,8 @@ def _build_seasonal_frequency_between_dates(season: list[str], clipped: bool): begin_date.month, end_date.month, begin_date.day, end_date.day ), pandas_freq=f"AS-{MONTHS_MAP[begin_date.month]}", - description=f"seasonal time series (season: from {begin_date} to {end_date})", + description=f"seasonal time series" + f" (season: from {begin_formatted} to {end_formatted})", accepted_values=[], time_clipping=time_clipping, ) @@ -428,23 +431,14 @@ def _read_date(date_string: str) -> datetime: return date -def _get_filter_between_dates(begin_date: datetime, end_date: datetime): - def filter_between_dates(da: DataArray): - between_dates_range = pd.date_range(begin_date, end_date, freq="D").dayofyear - between_dates_mask = np.logical_and( - da.time.dt.dayofyear >= between_dates_range.min(), - da.time.dt.dayofyear <= between_dates_range.max(), - ) - return da[between_dates_mask] - - return lambda da: filter_between_dates(da) +def _get_month_filter(season): + return lambda da: xclim.core.calendar.select_time(da, month=season) -def _get_month_filter(month_list: list[int]) -> Callable: - def filter_months(da: DataArray) -> DataArray: - return da.sel(time=da.time.dt.month.isin(month_list)) - - return lambda da: filter_months(da) +def _get_filter_between_dates(begin_date: str, end_date: str): + return lambda da: xclim.core.calendar.select_time( + da, date_bounds=(begin_date, end_date) + ) SliceMode = Union[Frequency, str, List[Union[str, Tuple, int]]] diff --git a/icclim/models/index_config.py b/icclim/models/index_config.py index 35ae56c2..0680a55c 100644 --- a/icclim/models/index_config.py +++ b/icclim/models/index_config.py @@ -186,17 +186,16 @@ def _build_cf_variable( if chunk_it: da = da.chunk("auto") # noqa - typing fixed in futur xarray version study_da = _build_study_da(da, time_range, ignore_Feb29th) - if time_clipping is not None: - study_da = time_clipping(study_da) if base_period_time_range is not None: reference_da = _build_reference_da(da, base_period_time_range, only_leap_years) - if time_clipping is not None: - reference_da = time_clipping(reference_da) else: reference_da = study_da - # TODO: all these operations should probably be added in history metadata - # it could be a property in CfVariable which will be reused when we update the - # metadata of the index, at the end. + if time_clipping is not None: + study_da = time_clipping(study_da) + reference_da = time_clipping(reference_da) + # TODO: all these operations should probably be added in history metadata or + # provenance it could be a property in CfVariable which will be reused when we + # update the metadata of the index, at the end. return CfVariable(name, study_da, reference_da) diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 480e0d69..69c5a049 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -234,7 +234,6 @@ def test_indices_all_from_Dataset__seasonal_error(self): slice_mode=["season", [1, 2, 3]], ) - # @pytest.mark.skip(reason="BUG: (add gh link)") def test_indices_all_from_Dataset__between_year_clipped_season(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data From 5b1717090c4382eaaa300478311de4f397bfcfee Mon Sep 17 00:00:00 2001 From: abel Date: Mon, 23 May 2022 18:32:55 +0200 Subject: [PATCH 09/10] Fix: setup.py --- setup.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index c66d7a40..16ac67e4 100644 --- a/setup.py +++ b/setup.py @@ -24,12 +24,10 @@ "b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3", ] -dependency_links = ( - [ - # TODO: Remove once xclim 0.37 is released - "https://github.com/Ouranosinc/xclim/tarball/master#egg=b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3" # noqa - ], -) +dependency_links = [ + # TODO: Remove once xclim 0.37 is released + "https://github.com/Ouranosinc/xclim/tarball/master#egg=b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3" # noqa +] setup( name="icclim", From 71ff316c5ac04be26f6379502ec2105236dab94e Mon Sep 17 00:00:00 2001 From: abel Date: Mon, 23 May 2022 19:04:26 +0200 Subject: [PATCH 10/10] Fix: setup.py --- setup.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 16ac67e4..b45e4de4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # https://github.com/numba/numba/issues/7754 "numpy>=1.16,<1.22", "xarray>=0.17", - "xclim>=0.34", + "xclim @ git+https://github.com/Ouranosinc/xclim.git", "cftime>=1.4.1", "dask[array]", "netCDF4>=1.5.7", @@ -21,12 +21,6 @@ "fsspec", "pandas>=1.3", "dateparser", - "b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3", -] - -dependency_links = [ - # TODO: Remove once xclim 0.37 is released - "https://github.com/Ouranosinc/xclim/tarball/master#egg=b08fa84d3dd3659a1923b7c64f6ca9976ff7dbd3" # noqa ] setup( @@ -41,7 +35,6 @@ include_package_data=True, url="https://github.com/cerfacs-globc/icclim", install_requires=MINIMAL_REQUIREMENTS, - dependency_links=dependency_links, python_requires=">=3.8", classifiers=[ "Programming Language :: Python",