From c16929423d46637ae42634dc86efd40b3a3f9161 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Thu, 18 Jul 2024 16:32:58 +0200 Subject: [PATCH 01/13] feat: first attempt to add resample distributions function for probability matching --- pysteps/blending/steps.py | 2 +- pysteps/postprocessing/probmatching.py | 36 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 8bcfb3f33..16f5f7be5 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -53,7 +53,7 @@ from pysteps import noise from pysteps import utils from pysteps.nowcasts import utils as nowcast_utils -from pysteps.postprocessing import probmatching +from pysteps.postprocessing import probmatching, resample_distributions from pysteps.timeseries import autoregression, correlation from pysteps import blending diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index e84935714..657d42024 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -13,6 +13,7 @@ pmm_init pmm_compute shift_scale + resample_distributions """ import numpy as np @@ -259,6 +260,41 @@ def _get_error(scale): return shift, scale, R.reshape(shape) +def resample_distributions(a, b, weight): + """ + Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module) + to effectively combine two distributions for the probability matching without losing extremes. + Parameters + ---------- + a: array_like + One of the two arrays from which the distribution should be samples (e.g. the extrapolation + cascade). + b: array_like + One of the two arrays from which the distribution should be samples (e.g. the NWP (model) + cascade). + weight: float + The weight that a should get (as a value between 0 and 1). + + Returns + ---------- + csort: array_like + The output distribution as a drawn binomial distribution from the input arrays a and b. + """ + # Prepare the input distributions + assert a.size == b.size + asort = np.sort(a.flatten())[::-1] + bsort = np.sort(b.flatten())[::-1] + n = asort.shape[0] + + # Resample the distributions + idxsamples = np.random.binomial(1, weight, n).astype(bool) + csort = bsort.copy() + csort[idxsamples] = asort[idxsamples] + csort = np.sort(csort)[::-1] + + return csort + + def _invfunc(y, fx, fy): if len(y) == 0: return np.array([]) From 773a6309799858aceff09a419827f9ace38c69ae Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Thu, 18 Jul 2024 17:01:52 +0200 Subject: [PATCH 02/13] fix: remove erroneous import --- pysteps/blending/steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 16f5f7be5..8bcfb3f33 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -53,7 +53,7 @@ from pysteps import noise from pysteps import utils from pysteps.nowcasts import utils as nowcast_utils -from pysteps.postprocessing import probmatching, resample_distributions +from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps import blending From 1ed1941abb85de92280981c5da2557700e2b2a5c Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 11:47:09 +0200 Subject: [PATCH 03/13] feat: add resampling option to probability matching step --- pysteps/blending/steps.py | 49 ++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 8bcfb3f33..4f47d6de4 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -90,6 +90,7 @@ def forecast( conditional=False, probmatching_method="cdf", mask_method="incremental", + resample_distribution=False, smooth_radar_mask_range=0, callback=None, return_output=True, @@ -205,25 +206,32 @@ def forecast( If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. + probmatching_method: {'cdf','mean',None}, optional + Method for matching the statistics of the forecast field with those of + the most recently observed one. 'cdf'=map the forecast CDF to the observed + one, 'mean'=adjust only the conditional mean value of the forecast field + in precipitation areas, None=no matching applied. Using 'mean' requires + that mask_method is not None. mask_method: {'obs','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. + resample_distribution: bool, optional + Method to resample the distribution from the extrapolation and NWP cascade as input + for the probability matching. Not resampling these distributions may lead to losing + some extremes when the weight of both the extrapolation and NWP cascade is similar. + Defaults to False. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either - not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range - should be a positive value, representing a buffer band of a number of pixels - by which the mask is cropped and smoothed. The smooth radar mask removes - the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. - probmatching_method: {'cdf','mean',None}, optional - Method for matching the statistics of the forecast field with those of - the most recently observed one. 'cdf'=map the forecast CDF to the observed - one, 'mean'=adjust only the conditional mean value of the forecast field - in precipitation areas, None=no matching applied. Using 'mean' requires - that mask_method is not None. + not present anymore or is not reliable. If set to 0 (grid cells), this generates a + normal forecast without smoothing. To create a smooth mask, this range should be a + positive value, representing a buffer band of a number of pixels by which the mask + is cropped and smoothed. The smooth radar mask removes the hard edges between NWP + and radar in the final blended product. Typically, a value between 50 and 100 km + can be used. 80 km generally gives good results. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array @@ -1404,7 +1412,6 @@ def worker(j): # latest extrapolated radar rainfall field blended with the # nwp model(s) rainfall forecast fields as 'benchmark'. - # TODO: Check probability matching method # 8.7.1 first blend the extrapolated rainfall field (the field # that is only used for post-processing steps) with the NWP # rainfall forecast for this time step using the weights @@ -1538,19 +1545,35 @@ def worker(j): # Set to min value outside of mask R_f_new[~MASK_prec_] = R_cmin + # If probmatching_method is not None, resample the distribution from + # both the extrapolation cascade and the model (NWP) cascade and use + # that for the probability matching + if probmatching_method is not None and resample_distribution: + R_pm_resampled = probmatching.resample_distributions( + a=R_pm_ep[t_index], + b=precip_models_pm_temp, + weight=weights_pm_normalized[ + 0 + ], # Use the weight from the extrapolation cascade here + ) + else: + R_pm_resampled = R_pm_blended.copy() + if probmatching_method == "cdf": # Adjust the CDF of the forecast to match the most recent # benchmark rainfall field (R_pm_blended). If the forecast if np.any(np.isfinite(R_f_new)): R_f_new = probmatching.nonparam_match_empirical_cdf( - R_f_new, R_pm_blended + R_f_new, R_pm_resampled ) + R_pm_resampled = None elif probmatching_method == "mean": # Use R_pm_blended as benchmark field and - mu_0 = np.mean(R_pm_blended[R_pm_blended >= precip_thr]) + mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr]) MASK = R_f_new >= precip_thr mu_fct = np.mean(R_f_new[MASK]) R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0 + R_pm_resampled = None R_f_out.append(R_f_new) From 47329319930e20aa74f1509b369e367678f350fd Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 13:35:52 +0200 Subject: [PATCH 04/13] fix: make sure there are no nans in the resampling function --- pysteps/blending/steps.py | 2 +- pysteps/postprocessing/probmatching.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 4f47d6de4..3c79b33a1 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1551,7 +1551,7 @@ def worker(j): if probmatching_method is not None and resample_distribution: R_pm_resampled = probmatching.resample_distributions( a=R_pm_ep[t_index], - b=precip_models_pm_temp, + b=precip_models_pm_temp[j], weight=weights_pm_normalized[ 0 ], # Use the weight from the extrapolation cascade here diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 657d42024..6783721c4 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -280,6 +280,12 @@ def resample_distributions(a, b, weight): csort: array_like The output distribution as a drawn binomial distribution from the input arrays a and b. """ + # First make sure there are no nans in the input data + nan_indices = np.isnan(a) + a[nan_indices] = np.nanmin(a) + nan_indices = np.isnan(b) + b[nan_indices] = np.nanmin(b) + # Prepare the input distributions assert a.size == b.size asort = np.sort(a.flatten())[::-1] From 1abec80ffc350d3a8d7a25a3228ca46d9e7ac90c Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 15:25:42 +0200 Subject: [PATCH 05/13] fix: fill up space outside radar domain with model data and change function names --- pysteps/blending/steps.py | 8 +++---- pysteps/postprocessing/probmatching.py | 29 +++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 3c79b33a1..7a0148cab 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1550,11 +1550,9 @@ def worker(j): # that for the probability matching if probmatching_method is not None and resample_distribution: R_pm_resampled = probmatching.resample_distributions( - a=R_pm_ep[t_index], - b=precip_models_pm_temp[j], - weight=weights_pm_normalized[ - 0 - ], # Use the weight from the extrapolation cascade here + extrapolation_cascade=R_pm_ep[t_index], + model_cascade=precip_models_pm_temp[j], + weight_extrapolation=weights_pm_normalized[0], ) else: R_pm_resampled = R_pm_blended.copy() diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 6783721c4..8fcf35aa5 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -260,40 +260,41 @@ def _get_error(scale): return shift, scale, R.reshape(shape) -def resample_distributions(a, b, weight): +def resample_distributions(extrapolation_cascade, model_cascade, weight_extrapolation): """ Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for the probability matching without losing extremes. Parameters ---------- - a: array_like + extrapolation_cascade: array_like One of the two arrays from which the distribution should be samples (e.g. the extrapolation cascade). - b: array_like + model_cascade: array_like One of the two arrays from which the distribution should be samples (e.g. the NWP (model) cascade). - weight: float - The weight that a should get (as a value between 0 and 1). + weight_extrapolation: float + The weight that extrapolation_cascade should get (as a value between 0 and 1). Returns ---------- csort: array_like - The output distribution as a drawn binomial distribution from the input arrays a and b. + The output distribution as extrapolation_cascade drawn binomial distribution from the input arrays extrapolation_cascade and model_cascade. """ # First make sure there are no nans in the input data - nan_indices = np.isnan(a) - a[nan_indices] = np.nanmin(a) - nan_indices = np.isnan(b) - b[nan_indices] = np.nanmin(b) + # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data + nan_indices = np.isnan(model_cascade) + model_cascade[nan_indices] = np.nanmin(model_cascade) + nan_indices = np.isnan(extrapolation_cascade) + extrapolation_cascade[nan_indices] = model_cascade[nan_indices] # Prepare the input distributions - assert a.size == b.size - asort = np.sort(a.flatten())[::-1] - bsort = np.sort(b.flatten())[::-1] + assert extrapolation_cascade.size == model_cascade.size + asort = np.sort(extrapolation_cascade.flatten())[::-1] + bsort = np.sort(model_cascade.flatten())[::-1] n = asort.shape[0] # Resample the distributions - idxsamples = np.random.binomial(1, weight, n).astype(bool) + idxsamples = np.random.binomial(1, weight_extrapolation, n).astype(bool) csort = bsort.copy() csort[idxsamples] = asort[idxsamples] csort = np.sort(csort)[::-1] From 7477d22ad21cec7e33f2aff2c7e0c349c1bd0ac5 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 15:28:25 +0200 Subject: [PATCH 06/13] docs: add info about the weights --- pysteps/postprocessing/probmatching.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 8fcf35aa5..b650a261f 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -273,7 +273,8 @@ def resample_distributions(extrapolation_cascade, model_cascade, weight_extrapol One of the two arrays from which the distribution should be samples (e.g. the NWP (model) cascade). weight_extrapolation: float - The weight that extrapolation_cascade should get (as a value between 0 and 1). + The weight that extrapolation_cascade should get (as a value between 0 and 1). This is + typically based on cascade level 2 (as is done for all the probability matching steps in `blending/steps.py`) Returns ---------- From e4daa0b1eab7c0c827c5edcfc139ee46ded217ba Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 15:50:28 +0200 Subject: [PATCH 07/13] fix: add requested changes from review --- pysteps/blending/steps.py | 10 ++++----- pysteps/postprocessing/probmatching.py | 28 +++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 7a0148cab..b381003e3 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -90,7 +90,7 @@ def forecast( conditional=False, probmatching_method="cdf", mask_method="incremental", - resample_distribution=False, + resample_distribution=True, smooth_radar_mask_range=0, callback=None, return_output=True, @@ -222,7 +222,7 @@ def forecast( Method to resample the distribution from the extrapolation and NWP cascade as input for the probability matching. Not resampling these distributions may lead to losing some extremes when the weight of both the extrapolation and NWP cascade is similar. - Defaults to False. + Defaults to True. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either @@ -1550,9 +1550,9 @@ def worker(j): # that for the probability matching if probmatching_method is not None and resample_distribution: R_pm_resampled = probmatching.resample_distributions( - extrapolation_cascade=R_pm_ep[t_index], - model_cascade=precip_models_pm_temp[j], - weight_extrapolation=weights_pm_normalized[0], + first_array=R_pm_ep[t_index], + second_array=precip_models_pm_temp[j], + probability_first_array=weights_pm_normalized[0], ) else: R_pm_resampled = R_pm_blended.copy() diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index b650a261f..12d670cc2 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -260,42 +260,42 @@ def _get_error(scale): return shift, scale, R.reshape(shape) -def resample_distributions(extrapolation_cascade, model_cascade, weight_extrapolation): +def resample_distributions(first_array, second_array, probability_first_array): """ Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for the probability matching without losing extremes. Parameters ---------- - extrapolation_cascade: array_like + first_array: array_like One of the two arrays from which the distribution should be samples (e.g. the extrapolation cascade). - model_cascade: array_like + second_array: array_like One of the two arrays from which the distribution should be samples (e.g. the NWP (model) cascade). - weight_extrapolation: float - The weight that extrapolation_cascade should get (as a value between 0 and 1). This is + probability_first_array: float + The weight that first_array should get (as a value between 0 and 1). This is typically based on cascade level 2 (as is done for all the probability matching steps in `blending/steps.py`) Returns ---------- csort: array_like - The output distribution as extrapolation_cascade drawn binomial distribution from the input arrays extrapolation_cascade and model_cascade. + The output distribution as first_array drawn binomial distribution from the input arrays first_array and second_array. """ # First make sure there are no nans in the input data # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data - nan_indices = np.isnan(model_cascade) - model_cascade[nan_indices] = np.nanmin(model_cascade) - nan_indices = np.isnan(extrapolation_cascade) - extrapolation_cascade[nan_indices] = model_cascade[nan_indices] + nan_indices = np.isnan(second_array) + second_array[nan_indices] = np.nanmin(second_array) + nan_indices = np.isnan(first_array) + first_array[nan_indices] = second_array[nan_indices] # Prepare the input distributions - assert extrapolation_cascade.size == model_cascade.size - asort = np.sort(extrapolation_cascade.flatten())[::-1] - bsort = np.sort(model_cascade.flatten())[::-1] + assert first_array.size == second_array.size + asort = np.sort(first_array.flatten())[::-1] + bsort = np.sort(second_array.flatten())[::-1] n = asort.shape[0] # Resample the distributions - idxsamples = np.random.binomial(1, weight_extrapolation, n).astype(bool) + idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool) csort = bsort.copy() csort[idxsamples] = asort[idxsamples] csort = np.sort(csort)[::-1] From 121c54561e15ea2c93dc990711b921c83885c893 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 19 Jul 2024 17:21:10 +0200 Subject: [PATCH 08/13] fix: add tests and make sure probability stays within bounds --- pysteps/postprocessing/probmatching.py | 8 ++++ pysteps/tests/test_blending_steps.py | 59 ++++++++++++++------------ 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 12d670cc2..711e93ce9 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -281,6 +281,14 @@ def resample_distributions(first_array, second_array, probability_first_array): csort: array_like The output distribution as first_array drawn binomial distribution from the input arrays first_array and second_array. """ + # Make sure that the probability is never: <0, >1 or nan + if probability_first_array < 0.0: + probability_first_array = 0.0 + elif probability_first_array > 1.0: + probability_first_array = 1.0 + elif np.isnan(probability_first_array): + probability_first_array = 0.0 + # First make sure there are no nans in the input data # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data nan_indices = np.isnan(second_array) diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index ea20eaf25..1b813e08b 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -8,37 +8,40 @@ steps_arg_values = [ - (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0), - (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0), - (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0), - (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0), - (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0), - (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0), + (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True), + (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True), + (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), + (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False), + (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False), + (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False), + (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False), # Test the case where the radar image contains no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True), # Test the case where the NWP fields contain no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True), # Test the case where both the radar image and the NWP fields contain no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0), - (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False), + (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False), # Test for smooth radar mask - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False), ] steps_arg_names = ( @@ -55,6 +58,7 @@ "zero_radar", "zero_nwp", "smooth_radar_mask_range", + "resample_distribution", ) @@ -73,6 +77,7 @@ def test_steps_blending( zero_radar, zero_nwp, smooth_radar_mask_range, + resample_distribution, ): pytest.importorskip("cv2") From 3fc9853895c5c9a66068ea401e6e99b1e92240f7 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Sun, 21 Jul 2024 23:06:52 +0200 Subject: [PATCH 09/13] Better docstrings --- pysteps/blending/steps.py | 1 + pysteps/postprocessing/probmatching.py | 31 +++++++++++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index b381003e3..27590102f 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1549,6 +1549,7 @@ def worker(j): # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching if probmatching_method is not None and resample_distribution: + # resample weights are based on cascade level 2 R_pm_resampled = probmatching.resample_distributions( first_array=R_pm_ep[t_index], second_array=precip_models_pm_temp[j], diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 711e93ce9..7372572a9 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -262,25 +262,36 @@ def _get_error(scale): def resample_distributions(first_array, second_array, probability_first_array): """ - Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module) - to effectively combine two distributions for the probability matching without losing extremes. + Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) + to effectively combine two distributions for probability matching without losing extremes. + Parameters ---------- first_array: array_like - One of the two arrays from which the distribution should be samples (e.g. the extrapolation - cascade). + One of the two arrays from which the distribution should be sampled (e.g., the extrapolation + cascade). It must be of the same shape as `second_array`. second_array: array_like - One of the two arrays from which the distribution should be samples (e.g. the NWP (model) - cascade). + One of the two arrays from which the distribution should be sampled (e.g., the NWP (model) + cascade). It must be of the same shape as `first_array`. probability_first_array: float - The weight that first_array should get (as a value between 0 and 1). This is - typically based on cascade level 2 (as is done for all the probability matching steps in `blending/steps.py`) + The weight that `first_array` should get (a value between 0 and 1). This determines the + likelihood of selecting elements from `first_array` over `second_array`. Returns - ---------- + ------- csort: array_like - The output distribution as first_array drawn binomial distribution from the input arrays first_array and second_array. + The combined output distribution. This is an array of the same shape as the input arrays, + where each element is chosen from either `first_array` or `second_array` based on the specified + probability, and then sorted in descending order. + + Raises + ------ + ValueError + If `first_array` and `second_array` do not have the same shape. """ + if first_array.shape != second_array.shape: + raise ValueError("first_array and second_array must have the same shape") + # Make sure that the probability is never: <0, >1 or nan if probability_first_array < 0.0: probability_first_array = 0.0 From f701bbf27b3feaca4a5df742278215e933535f10 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Sun, 21 Jul 2024 23:18:43 +0200 Subject: [PATCH 10/13] Minor code improvements --- pysteps/postprocessing/probmatching.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 7372572a9..2d8383afd 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -289,16 +289,11 @@ def resample_distributions(first_array, second_array, probability_first_array): ValueError If `first_array` and `second_array` do not have the same shape. """ + + # Valide inputs if first_array.shape != second_array.shape: raise ValueError("first_array and second_array must have the same shape") - - # Make sure that the probability is never: <0, >1 or nan - if probability_first_array < 0.0: - probability_first_array = 0.0 - elif probability_first_array > 1.0: - probability_first_array = 1.0 - elif np.isnan(probability_first_array): - probability_first_array = 0.0 + probability_first_array = np.clip(probability_first_array, 0.0, 1.0) # First make sure there are no nans in the input data # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data @@ -307,16 +302,14 @@ def resample_distributions(first_array, second_array, probability_first_array): nan_indices = np.isnan(first_array) first_array[nan_indices] = second_array[nan_indices] - # Prepare the input distributions - assert first_array.size == second_array.size - asort = np.sort(first_array.flatten())[::-1] - bsort = np.sort(second_array.flatten())[::-1] + # Flatten and sort the arrays + asort = np.sort(first_array, axis=None)[::-1] + bsort = np.sort(second_array, axis=None)[::-1] n = asort.shape[0] # Resample the distributions idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool) - csort = bsort.copy() - csort[idxsamples] = asort[idxsamples] + csort = np.where(idxsamples, asort, bsort) csort = np.sort(csort)[::-1] return csort From 0fc0da295d2c63d42c15f31348efbd2a6c6748b4 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Sun, 21 Jul 2024 23:26:47 +0200 Subject: [PATCH 11/13] Add test --- .../tests/test_postprocessing_probmatching.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 pysteps/tests/test_postprocessing_probmatching.py diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py new file mode 100644 index 000000000..15d003d2c --- /dev/null +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -0,0 +1,57 @@ +import numpy as np +import pytest +from pysteps.postprocessing.probmatching import resample_distributions + +class TestResampleDistributions: + + @pytest.fixture(autouse=True) + def setup(self): + # Set the seed for reproducibility + np.random.seed(42) + + def test_valid_inputs(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.6 + result = resample_distributions(first_array, second_array, probability_first_array) + expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed + assert result.shape == first_array.shape + assert np.array_equal(result, expected_result) + + def test_probability_zero(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.0 + result = resample_distributions(first_array, second_array, probability_first_array) + assert np.array_equal(result, np.sort(second_array)[::-1]) + + def test_probability_one(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 1.0 + result = resample_distributions(first_array, second_array, probability_first_array) + assert np.array_equal(result, np.sort(first_array)[::-1]) + + def test_nan_in_first_array(self): + first_array_with_nan = np.array([1, 3, np.nan, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.6 + result = resample_distributions(first_array_with_nan, second_array, probability_first_array) + assert result.shape == first_array_with_nan.shape + assert not np.any(np.isnan(result)) + + def test_nan_in_second_array(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array_with_nan = np.array([2, 4, 6, 8, np.nan]) + probability_first_array = 0.6 + result = resample_distributions(first_array, second_array_with_nan, probability_first_array) + assert result.shape == first_array.shape + assert not np.any(np.isnan(result)) + + def test_nan_in_both_arrays(self): + first_array_with_nan = np.array([1, np.nan, 5, np.nan, 9]) + second_array_with_nan = np.array([np.nan, 4, np.nan, 8, 10]) + probability_first_array = 0.6 + result = resample_distributions(first_array_with_nan, second_array_with_nan, probability_first_array) + assert result.shape == first_array_with_nan.shape + assert not np.any(np.isnan(result)) From 85242a7cf922b16d1e29c42585dedb609d676cda Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Sun, 21 Jul 2024 23:28:54 +0200 Subject: [PATCH 12/13] Run black --- .../tests/test_postprocessing_probmatching.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index 15d003d2c..3e134bd15 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -2,18 +2,21 @@ import pytest from pysteps.postprocessing.probmatching import resample_distributions + class TestResampleDistributions: @pytest.fixture(autouse=True) def setup(self): # Set the seed for reproducibility np.random.seed(42) - + def test_valid_inputs(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 - result = resample_distributions(first_array, second_array, probability_first_array) + result = resample_distributions( + first_array, second_array, probability_first_array + ) expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed assert result.shape == first_array.shape assert np.array_equal(result, expected_result) @@ -22,21 +25,27 @@ def test_probability_zero(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 - result = resample_distributions(first_array, second_array, probability_first_array) + result = resample_distributions( + first_array, second_array, probability_first_array + ) assert np.array_equal(result, np.sort(second_array)[::-1]) def test_probability_one(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 1.0 - result = resample_distributions(first_array, second_array, probability_first_array) + result = resample_distributions( + first_array, second_array, probability_first_array + ) assert np.array_equal(result, np.sort(first_array)[::-1]) def test_nan_in_first_array(self): first_array_with_nan = np.array([1, 3, np.nan, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 - result = resample_distributions(first_array_with_nan, second_array, probability_first_array) + result = resample_distributions( + first_array_with_nan, second_array, probability_first_array + ) assert result.shape == first_array_with_nan.shape assert not np.any(np.isnan(result)) @@ -44,7 +53,9 @@ def test_nan_in_second_array(self): first_array = np.array([1, 3, 5, 7, 9]) second_array_with_nan = np.array([2, 4, 6, 8, np.nan]) probability_first_array = 0.6 - result = resample_distributions(first_array, second_array_with_nan, probability_first_array) + result = resample_distributions( + first_array, second_array_with_nan, probability_first_array + ) assert result.shape == first_array.shape assert not np.any(np.isnan(result)) @@ -52,6 +63,8 @@ def test_nan_in_both_arrays(self): first_array_with_nan = np.array([1, np.nan, 5, np.nan, 9]) second_array_with_nan = np.array([np.nan, 4, np.nan, 8, 10]) probability_first_array = 0.6 - result = resample_distributions(first_array_with_nan, second_array_with_nan, probability_first_array) + result = resample_distributions( + first_array_with_nan, second_array_with_nan, probability_first_array + ) assert result.shape == first_array_with_nan.shape assert not np.any(np.isnan(result)) From a46e62a58aa0af64e7421bc8d273f0696974f389 Mon Sep 17 00:00:00 2001 From: Daniele Nerini Date: Mon, 22 Jul 2024 12:17:36 +0200 Subject: [PATCH 13/13] Handle nans outside of resampling method --- pysteps/blending/steps.py | 11 +++-- pysteps/postprocessing/probmatching.py | 15 +++---- .../tests/test_postprocessing_probmatching.py | 43 +++++++------------ 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 27590102f..5ae894795 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1549,10 +1549,15 @@ def worker(j): # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching if probmatching_method is not None and resample_distribution: - # resample weights are based on cascade level 2 + # deal with missing values + arr1 = R_pm_ep[t_index] + arr2 = precip_models_pm_temp[j] + arr2 = np.where(np.isnan(arr2), np.nanmin(arr2), arr2) + arr1 = np.where(np.isnan(arr1), arr2, arr1) + # resample weights based on cascade level 2 R_pm_resampled = probmatching.resample_distributions( - first_array=R_pm_ep[t_index], - second_array=precip_models_pm_temp[j], + first_array=arr1, + second_array=arr2, probability_first_array=weights_pm_normalized[0], ) else: diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 2d8383afd..7c96149e9 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -269,10 +269,10 @@ def resample_distributions(first_array, second_array, probability_first_array): ---------- first_array: array_like One of the two arrays from which the distribution should be sampled (e.g., the extrapolation - cascade). It must be of the same shape as `second_array`. + cascade). It must be of the same shape as `second_array`. Input must not contain NaNs. second_array: array_like One of the two arrays from which the distribution should be sampled (e.g., the NWP (model) - cascade). It must be of the same shape as `first_array`. + cascade). It must be of the same shape as `first_array`.. Input must not contain NaNs. probability_first_array: float The weight that `first_array` should get (a value between 0 and 1). This determines the likelihood of selecting elements from `first_array` over `second_array`. @@ -287,21 +287,16 @@ def resample_distributions(first_array, second_array, probability_first_array): Raises ------ ValueError - If `first_array` and `second_array` do not have the same shape. + If `first_array` and `second_array` do not have the same shape or if inputs contain NaNs. """ # Valide inputs if first_array.shape != second_array.shape: raise ValueError("first_array and second_array must have the same shape") + if np.isnan(first_array).any() or np.isnan(second_array).any(): + raise ValueError("Input arrays must not contain NaNs") probability_first_array = np.clip(probability_first_array, 0.0, 1.0) - # First make sure there are no nans in the input data - # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data - nan_indices = np.isnan(second_array) - second_array[nan_indices] = np.nanmin(second_array) - nan_indices = np.isnan(first_array) - first_array[nan_indices] = second_array[nan_indices] - # Flatten and sort the arrays asort = np.sort(first_array, axis=None)[::-1] bsort = np.sort(second_array, axis=None)[::-1] diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index 3e134bd15..fa27cdf39 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -39,32 +39,19 @@ def test_probability_one(self): ) assert np.array_equal(result, np.sort(first_array)[::-1]) - def test_nan_in_first_array(self): - first_array_with_nan = np.array([1, 3, np.nan, 7, 9]) - second_array = np.array([2, 4, 6, 8, 10]) + def test_nan_in_inputs(self): + array_with_nan = np.array([1, 3, np.nan, 7, 9]) + array_without_nan = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 - result = resample_distributions( - first_array_with_nan, second_array, probability_first_array - ) - assert result.shape == first_array_with_nan.shape - assert not np.any(np.isnan(result)) - - def test_nan_in_second_array(self): - first_array = np.array([1, 3, 5, 7, 9]) - second_array_with_nan = np.array([2, 4, 6, 8, np.nan]) - probability_first_array = 0.6 - result = resample_distributions( - first_array, second_array_with_nan, probability_first_array - ) - assert result.shape == first_array.shape - assert not np.any(np.isnan(result)) - - def test_nan_in_both_arrays(self): - first_array_with_nan = np.array([1, np.nan, 5, np.nan, 9]) - second_array_with_nan = np.array([np.nan, 4, np.nan, 8, 10]) - probability_first_array = 0.6 - result = resample_distributions( - first_array_with_nan, second_array_with_nan, probability_first_array - ) - assert result.shape == first_array_with_nan.shape - assert not np.any(np.isnan(result)) + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_with_nan, array_without_nan, probability_first_array + ) + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_without_nan, array_with_nan, probability_first_array + ) + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_with_nan, array_with_nan, probability_first_array + )