pySTEPS · dnerini · Jul 22, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 19, 2024
diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py
@@ -90,6 +90,7 @@ def forecast(
     conditional=False,
     probmatching_method="cdf",
     mask_method="incremental",
+    resample_distribution=True,
     smooth_radar_mask_range=0,
     callback=None,
     return_output=True,
@@ -205,25 +206,32 @@ def forecast(
       If set to True, compute the statistics of the precipitation field
       conditionally by excluding pixels where the values are below the threshold
       precip_thr.
+    probmatching_method: {'cdf','mean',None}, optional
+      Method for matching the statistics of the forecast field with those of
+      the most recently observed one. 'cdf'=map the forecast CDF to the observed
+      one, 'mean'=adjust only the conditional mean value of the forecast field
+      in precipitation areas, None=no matching applied. Using 'mean' requires
+      that mask_method is not None.
     mask_method: {'obs','incremental',None}, optional
       The method to use for masking no precipitation areas in the forecast field.
       The masked pixels are set to the minimum value of the observations.
       'obs' = apply precip_thr to the most recently observed precipitation intensity
       field, 'incremental' = iteratively buffer the mask with a certain rate
       (currently it is 1 km/min), None=no masking.
+    resample_distribution: bool, optional
+        Method to resample the distribution from the extrapolation and NWP cascade as input
+        for the probability matching. Not resampling these distributions may lead to losing
+        some extremes when the weight of both the extrapolation and NWP cascade is similar.
+        Defaults to True.
     smooth_radar_mask_range: int, Default is 0.
       Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise
       blend near the edge of the radar domain (radar mask), where the radar data is either
-      not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range
-      should be a positive value, representing a buffer band of a number of pixels
-      by which the mask is cropped and smoothed. The smooth radar mask removes
-      the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results.
-    probmatching_method: {'cdf','mean',None}, optional
-      Method for matching the statistics of the forecast field with those of
-      the most recently observed one. 'cdf'=map the forecast CDF to the observed
-      one, 'mean'=adjust only the conditional mean value of the forecast field
-      in precipitation areas, None=no matching applied. Using 'mean' requires
-      that mask_method is not None.
+      not present anymore or is not reliable. If set to 0 (grid cells), this generates a
+      normal forecast without smoothing. To create a smooth mask, this range should be a
+      positive value, representing a buffer band of a number of pixels by which the mask
+      is cropped and smoothed. The smooth radar mask removes the hard edges between NWP
+      and radar in the final blended product. Typically, a value between 50 and 100 km
+      can be used. 80 km generally gives good results.
     callback: function, optional
       Optional function that is called after computation of each time step of
       the nowcast. The function takes one argument: a three-dimensional array
@@ -1404,7 +1412,6 @@ def worker(j):
                         # latest extrapolated radar rainfall field blended with the
                         # nwp model(s) rainfall forecast fields as 'benchmark'.
 
-                        # TODO: Check probability matching method
                         # 8.7.1 first blend the extrapolated rainfall field (the field
                         # that is only used for post-processing steps) with the NWP
                         # rainfall forecast for this time step using the weights
@@ -1538,19 +1545,33 @@ def worker(j):
                             # Set to min value outside of mask
                             R_f_new[~MASK_prec_] = R_cmin
 
+                        # If probmatching_method is not None, resample the distribution from
+                        # both the extrapolation cascade and the model (NWP) cascade and use
+                        # that for the probability matching
+                        if probmatching_method is not None and resample_distribution:
+                            R_pm_resampled = probmatching.resample_distributions(
+                                first_array=R_pm_ep[t_index],
+                                second_array=precip_models_pm_temp[j],
+                                probability_first_array=weights_pm_normalized[0],
+                            )
+                        else:
+                            R_pm_resampled = R_pm_blended.copy()
+
                         if probmatching_method == "cdf":
                             # Adjust the CDF of the forecast to match the most recent
                             # benchmark rainfall field (R_pm_blended). If the forecast
                             if np.any(np.isfinite(R_f_new)):
                                 R_f_new = probmatching.nonparam_match_empirical_cdf(
-                                    R_f_new, R_pm_blended
+                                    R_f_new, R_pm_resampled
                                 )
+                                R_pm_resampled = None
                         elif probmatching_method == "mean":
                             # Use R_pm_blended as benchmark field and
-                            mu_0 = np.mean(R_pm_blended[R_pm_blended >= precip_thr])
+                            mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr])
                             MASK = R_f_new >= precip_thr
                             mu_fct = np.mean(R_f_new[MASK])
                             R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0
+                            R_pm_resampled = None
 
                         R_f_out.append(R_f_new)
 

diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py
@@ -13,6 +13,7 @@
     pmm_init
     pmm_compute
     shift_scale
+    resample_distributions
 """
 
 import numpy as np
@@ -259,6 +260,57 @@
     return shift, scale, R.reshape(shape)
 
 
+def resample_distributions(first_array, second_array, probability_first_array):
+    """
+    Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module)
+    to effectively combine two distributions for the probability matching without losing extremes.
+    Parameters
+    ----------
+    first_array: array_like
+        One of the two arrays from which the distribution should be samples (e.g. the extrapolation
+        cascade).
+    second_array: array_like
+        One of the two arrays from which the distribution should be samples (e.g. the NWP (model)
+        cascade).
+    probability_first_array: float
+        The weight that first_array should get (as a value between 0 and 1). This is
+        typically based on cascade level 2 (as is done for all the probability matching steps in `blending/steps.py`)
+
+    Returns
+    ----------
+    csort: array_like
+        The output distribution as first_array drawn binomial distribution from the input arrays first_array and second_array.
+    """
+    # Make sure that the probability is never: <0, >1 or nan
+    if probability_first_array < 0.0:
+        probability_first_array = 0.0
+    elif probability_first_array > 1.0:
+        probability_first_array = 1.0
+    elif np.isnan(probability_first_array):
+        probability_first_array = 0.0
+
+    # First make sure there are no nans in the input data
+    # Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data
+    nan_indices = np.isnan(second_array)
+    second_array[nan_indices] = np.nanmin(second_array)
+    nan_indices = np.isnan(first_array)
+    first_array[nan_indices] = second_array[nan_indices]
+
+    # Prepare the input distributions
+    assert first_array.size == second_array.size
+    asort = np.sort(first_array.flatten())[::-1]
+    bsort = np.sort(second_array.flatten())[::-1]
+    n = asort.shape[0]
+
+    # Resample the distributions
+    idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool)
+    csort = bsort.copy()
+    csort[idxsamples] = asort[idxsamples]
+    csort = np.sort(csort)[::-1]
+
+    return csort
+
+
 def _invfunc(y, fx, fy):
     if len(y) == 0:
         return np.array([])

diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py
@@ -8,37 +8,40 @@
 
 
 steps_arg_values = [
-    (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0),
-    (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0),
-    (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0),
-    (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0),
-    (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0),
-    (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0),
-    (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0),
-    (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0),
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0),
+    (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True),
+    (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False),
+    (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False),
+    (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False),
+    (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True),
+    (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False),
+    (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False),
+    (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False),
+    (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False),
+    (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False),
     #    Test the case where the radar image contains no rain.
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0),
-    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True),
     #   Test the case where the NWP fields contain no rain.
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0),
-    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True),
     # Test the case where both the radar image and the NWP fields contain no rain.
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0),
-    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0),
-    (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False),
+    (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False),
     # Test for smooth radar mask
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80),
-    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80),
-    (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80),
-    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80),
-    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80),
-    (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False),
+    (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False),
+    (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False),
+    (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True),
+    (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False),
 ]
 
 steps_arg_names = (
@@ -55,6 +58,7 @@
     "zero_radar",
     "zero_nwp",
     "smooth_radar_mask_range",
+    "resample_distribution",
 )
 
 
@@ -73,6 +77,7 @@ def test_steps_blending(
     zero_radar,
     zero_nwp,
     smooth_radar_mask_range,
+    resample_distribution,
 ):
     pytest.importorskip("cv2")