pySTEPS · dnerini · Jul 22, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 19, 2024
diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py
@@ -90,6 +90,7 @@
     conditional=False,
     probmatching_method="cdf",
     mask_method="incremental",
+    resample_distribution=False,
     smooth_radar_mask_range=0,
     callback=None,
     return_output=True,
@@ -205,25 +206,32 @@
       If set to True, compute the statistics of the precipitation field
       conditionally by excluding pixels where the values are below the threshold
       precip_thr.
+    probmatching_method: {'cdf','mean',None}, optional
+      Method for matching the statistics of the forecast field with those of
+      the most recently observed one. 'cdf'=map the forecast CDF to the observed
+      one, 'mean'=adjust only the conditional mean value of the forecast field
+      in precipitation areas, None=no matching applied. Using 'mean' requires
+      that mask_method is not None.
     mask_method: {'obs','incremental',None}, optional
       The method to use for masking no precipitation areas in the forecast field.
       The masked pixels are set to the minimum value of the observations.
       'obs' = apply precip_thr to the most recently observed precipitation intensity
       field, 'incremental' = iteratively buffer the mask with a certain rate
       (currently it is 1 km/min), None=no masking.
+    resample_distribution: bool, optional
+        Method to resample the distribution from the extrapolation and NWP cascade as input
+        for the probability matching. Not resampling these distributions may lead to losing
+        some extremes when the weight of both the extrapolation and NWP cascade is similar.
+        Defaults to False.
     smooth_radar_mask_range: int, Default is 0.
       Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise
       blend near the edge of the radar domain (radar mask), where the radar data is either
-      not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range
-      should be a positive value, representing a buffer band of a number of pixels
-      by which the mask is cropped and smoothed. The smooth radar mask removes
-      the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results.
-    probmatching_method: {'cdf','mean',None}, optional
-      Method for matching the statistics of the forecast field with those of
-      the most recently observed one. 'cdf'=map the forecast CDF to the observed
-      one, 'mean'=adjust only the conditional mean value of the forecast field
-      in precipitation areas, None=no matching applied. Using 'mean' requires
-      that mask_method is not None.
+      not present anymore or is not reliable. If set to 0 (grid cells), this generates a
+      normal forecast without smoothing. To create a smooth mask, this range should be a
+      positive value, representing a buffer band of a number of pixels by which the mask
+      is cropped and smoothed. The smooth radar mask removes the hard edges between NWP
+      and radar in the final blended product. Typically, a value between 50 and 100 km
+      can be used. 80 km generally gives good results.
     callback: function, optional
       Optional function that is called after computation of each time step of
       the nowcast. The function takes one argument: a three-dimensional array
@@ -1404,7 +1412,6 @@
                         # latest extrapolated radar rainfall field blended with the
                         # nwp model(s) rainfall forecast fields as 'benchmark'.
 
-                        # TODO: Check probability matching method
                         # 8.7.1 first blend the extrapolated rainfall field (the field
                         # that is only used for post-processing steps) with the NWP
                         # rainfall forecast for this time step using the weights
@@ -1538,19 +1545,35 @@
                             # Set to min value outside of mask
                             R_f_new[~MASK_prec_] = R_cmin
 
+                        # If probmatching_method is not None, resample the distribution from
+                        # both the extrapolation cascade and the model (NWP) cascade and use
+                        # that for the probability matching
+                        if probmatching_method is not None and resample_distribution:
+                            R_pm_resampled = probmatching.resample_distributions(
+                                a=R_pm_ep[t_index],
+                                b=precip_models_pm_temp[j],
+                                weight=weights_pm_normalized[
+                                    0
+                                ],  # Use the weight from the extrapolation cascade here
+                            )
+                        else:
+                            R_pm_resampled = R_pm_blended.copy()
+
                         if probmatching_method == "cdf":
                             # Adjust the CDF of the forecast to match the most recent
                             # benchmark rainfall field (R_pm_blended). If the forecast
                             if np.any(np.isfinite(R_f_new)):
                                 R_f_new = probmatching.nonparam_match_empirical_cdf(
-                                    R_f_new, R_pm_blended
+                                    R_f_new, R_pm_resampled
                                 )
+                                R_pm_resampled = None
                         elif probmatching_method == "mean":
                             # Use R_pm_blended as benchmark field and
-                            mu_0 = np.mean(R_pm_blended[R_pm_blended >= precip_thr])
+                            mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr])
                             MASK = R_f_new >= precip_thr
                             mu_fct = np.mean(R_f_new[MASK])
                             R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0
+                            R_pm_resampled = None
 
                         R_f_out.append(R_f_new)
 

diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py
@@ -13,6 +13,7 @@
     pmm_init
     pmm_compute
     shift_scale
+    resample_distributions
 """
 
 import numpy as np
@@ -259,6 +260,47 @@
     return shift, scale, R.reshape(shape)
 
 
+def resample_distributions(a, b, weight):
+    """
+    Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module)
+    to effectively combine two distributions for the probability matching without losing extremes.
+    Parameters
+    ----------
+    a: array_like
+        One of the two arrays from which the distribution should be samples (e.g. the extrapolation
+        cascade).
+    b: array_like
+        One of the two arrays from which the distribution should be samples (e.g. the NWP (model)
+        cascade).
+    weight: float
+        The weight that a should get (as a value between 0 and 1).
+
+    Returns
+    ----------
+    csort: array_like
+        The output distribution as a drawn binomial distribution from the input arrays a and b.
+    """
+    # First make sure there are no nans in the input data
+    nan_indices = np.isnan(a)
+    a[nan_indices] = np.nanmin(a)
+    nan_indices = np.isnan(b)
+    b[nan_indices] = np.nanmin(b)
+
+    # Prepare the input distributions
+    assert a.size == b.size
+    asort = np.sort(a.flatten())[::-1]
+    bsort = np.sort(b.flatten())[::-1]
+    n = asort.shape[0]
+
+    # Resample the distributions
+    idxsamples = np.random.binomial(1, weight, n).astype(bool)
+    csort = bsort.copy()
+    csort[idxsamples] = asort[idxsamples]
+    csort = np.sort(csort)[::-1]
+
+    return csort
+
+
 def _invfunc(y, fx, fy):
     if len(y) == 0:
         return np.array([])