Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add resample distributions function for probability matching #390

Merged
merged 13 commits into from
Jul 22, 2024
Merged
47 changes: 34 additions & 13 deletions pysteps/blending/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def forecast(
conditional=False,
probmatching_method="cdf",
mask_method="incremental",
resample_distribution=True,
smooth_radar_mask_range=0,
callback=None,
return_output=True,
Expand Down Expand Up @@ -205,25 +206,32 @@ def forecast(
If set to True, compute the statistics of the precipitation field
conditionally by excluding pixels where the values are below the threshold
precip_thr.
probmatching_method: {'cdf','mean',None}, optional
Method for matching the statistics of the forecast field with those of
the most recently observed one. 'cdf'=map the forecast CDF to the observed
one, 'mean'=adjust only the conditional mean value of the forecast field
in precipitation areas, None=no matching applied. Using 'mean' requires
that mask_method is not None.
mask_method: {'obs','incremental',None}, optional
The method to use for masking no precipitation areas in the forecast field.
The masked pixels are set to the minimum value of the observations.
'obs' = apply precip_thr to the most recently observed precipitation intensity
field, 'incremental' = iteratively buffer the mask with a certain rate
(currently it is 1 km/min), None=no masking.
resample_distribution: bool, optional
Method to resample the distribution from the extrapolation and NWP cascade as input
for the probability matching. Not resampling these distributions may lead to losing
some extremes when the weight of both the extrapolation and NWP cascade is similar.
Defaults to True.
smooth_radar_mask_range: int, Default is 0.
Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise
blend near the edge of the radar domain (radar mask), where the radar data is either
not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range
should be a positive value, representing a buffer band of a number of pixels
by which the mask is cropped and smoothed. The smooth radar mask removes
the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results.
probmatching_method: {'cdf','mean',None}, optional
Method for matching the statistics of the forecast field with those of
the most recently observed one. 'cdf'=map the forecast CDF to the observed
one, 'mean'=adjust only the conditional mean value of the forecast field
in precipitation areas, None=no matching applied. Using 'mean' requires
that mask_method is not None.
not present anymore or is not reliable. If set to 0 (grid cells), this generates a
normal forecast without smoothing. To create a smooth mask, this range should be a
positive value, representing a buffer band of a number of pixels by which the mask
is cropped and smoothed. The smooth radar mask removes the hard edges between NWP
and radar in the final blended product. Typically, a value between 50 and 100 km
can be used. 80 km generally gives good results.
callback: function, optional
Optional function that is called after computation of each time step of
the nowcast. The function takes one argument: a three-dimensional array
Expand Down Expand Up @@ -1404,7 +1412,6 @@ def worker(j):
# latest extrapolated radar rainfall field blended with the
# nwp model(s) rainfall forecast fields as 'benchmark'.

# TODO: Check probability matching method
# 8.7.1 first blend the extrapolated rainfall field (the field
# that is only used for post-processing steps) with the NWP
# rainfall forecast for this time step using the weights
Expand Down Expand Up @@ -1538,19 +1545,33 @@ def worker(j):
# Set to min value outside of mask
R_f_new[~MASK_prec_] = R_cmin

# If probmatching_method is not None, resample the distribution from
# both the extrapolation cascade and the model (NWP) cascade and use
# that for the probability matching
if probmatching_method is not None and resample_distribution:
R_pm_resampled = probmatching.resample_distributions(
first_array=R_pm_ep[t_index],
second_array=precip_models_pm_temp[j],
probability_first_array=weights_pm_normalized[0],
)
else:
R_pm_resampled = R_pm_blended.copy()

if probmatching_method == "cdf":
# Adjust the CDF of the forecast to match the most recent
# benchmark rainfall field (R_pm_blended). If the forecast
if np.any(np.isfinite(R_f_new)):
R_f_new = probmatching.nonparam_match_empirical_cdf(
R_f_new, R_pm_blended
R_f_new, R_pm_resampled
)
R_pm_resampled = None
elif probmatching_method == "mean":
# Use R_pm_blended as benchmark field and
mu_0 = np.mean(R_pm_blended[R_pm_blended >= precip_thr])
mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr])
MASK = R_f_new >= precip_thr
mu_fct = np.mean(R_f_new[MASK])
R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0
R_pm_resampled = None

R_f_out.append(R_f_new)

Expand Down
52 changes: 52 additions & 0 deletions pysteps/postprocessing/probmatching.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
pmm_init
pmm_compute
shift_scale
resample_distributions
"""

import numpy as np
Expand Down Expand Up @@ -259,6 +260,57 @@
return shift, scale, R.reshape(shape)


def resample_distributions(first_array, second_array, probability_first_array):
"""
Merges two distributions (e.g. from the extrapolation nowcast and NWP in the blending module)
to effectively combine two distributions for the probability matching without losing extremes.
Parameters
----------
first_array: array_like
One of the two arrays from which the distribution should be samples (e.g. the extrapolation
cascade).
second_array: array_like
One of the two arrays from which the distribution should be samples (e.g. the NWP (model)
cascade).
probability_first_array: float
The weight that first_array should get (as a value between 0 and 1). This is
typically based on cascade level 2 (as is done for all the probability matching steps in `blending/steps.py`)

Returns
----------
csort: array_like
The output distribution as first_array drawn binomial distribution from the input arrays first_array and second_array.
"""
# Make sure that the probability is never: <0, >1 or nan
if probability_first_array < 0.0:
probability_first_array = 0.0

Check warning on line 286 in pysteps/postprocessing/probmatching.py

View check run for this annotation

Codecov / codecov/patch

pysteps/postprocessing/probmatching.py#L286

Added line #L286 was not covered by tests
elif probability_first_array > 1.0:
probability_first_array = 1.0
elif np.isnan(probability_first_array):
probability_first_array = 0.0

Check warning on line 290 in pysteps/postprocessing/probmatching.py

View check run for this annotation

Codecov / codecov/patch

pysteps/postprocessing/probmatching.py#L290

Added line #L290 was not covered by tests

# First make sure there are no nans in the input data
# Where the extrapolation cascade is nan (outside the radar domain), we fill it up with the model data
nan_indices = np.isnan(second_array)
second_array[nan_indices] = np.nanmin(second_array)
nan_indices = np.isnan(first_array)
first_array[nan_indices] = second_array[nan_indices]

# Prepare the input distributions
assert first_array.size == second_array.size
asort = np.sort(first_array.flatten())[::-1]
bsort = np.sort(second_array.flatten())[::-1]
n = asort.shape[0]

# Resample the distributions
idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool)
csort = bsort.copy()
csort[idxsamples] = asort[idxsamples]
csort = np.sort(csort)[::-1]

return csort


def _invfunc(y, fx, fy):
if len(y) == 0:
return np.array([])
Expand Down
59 changes: 32 additions & 27 deletions pysteps/tests/test_blending_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,40 @@


steps_arg_values = [
(1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0),
(1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0),
(1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0),
(1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0),
(1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0),
(1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0),
(1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0),
(1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0),
(1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0),
(2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0),
(5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0),
(1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0),
(2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0),
(1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True),
(1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False),
(1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False),
(1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False),
(1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True),
(1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False),
(2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False),
(5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False),
(1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False),
(2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False),
# Test the case where the radar image contains no rain.
(1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0),
(1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True),
# Test the case where the NWP fields contain no rain.
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True),
# Test the case where both the radar image and the NWP fields contain no rain.
(1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0),
(5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0),
(1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False),
(5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False),
# Test for smooth radar mask
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80),
(5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80),
(5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False),
(5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False),
(1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False),
(5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True),
(5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False),
]

steps_arg_names = (
Expand All @@ -55,6 +58,7 @@
"zero_radar",
"zero_nwp",
"smooth_radar_mask_range",
"resample_distribution",
)


Expand All @@ -73,6 +77,7 @@ def test_steps_blending(
zero_radar,
zero_nwp,
smooth_radar_mask_range,
resample_distribution,
):
pytest.importorskip("cv2")

Expand Down