epiforecasts · nikosbosse · Nov 15, 2023 · Nov 9, 2023 · Nov 9, 2023 · Nov 9, 2023
diff --git a/NAMESPACE b/NAMESPACE
@@ -29,6 +29,7 @@ export(bias_sample)
 export(brier_score)
 export(correlation)
 export(crps_sample)
+export(dispersion)
 export(dss_sample)
 export(get_duplicate_forecasts)
 export(interval_coverage_quantile)
@@ -42,6 +43,7 @@ export(make_NA)
 export(make_na)
 export(merge_pred_and_obs)
 export(new_scoringutils)
+export(overprediction)
 export(pairwise_comparison)
 export(pit)
 export(pit_sample)
@@ -67,6 +69,7 @@ export(summarise_scores)
 export(summarize_scores)
 export(theme_scoringutils)
 export(transform_forecasts)
+export(underprediction)
 export(validate)
 export(validate_general)
 export(wis)

diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R
@@ -2,13 +2,92 @@
 # Metrics with a many-to-one relationship between input and score
 ################################################################################
 
-#' Weighted Interval Score
+#' Weighted Interval Score (WIS)
+#' @description
+#' The WIS is a proper scoring rule used to evaluate forecasts in an interval- /
+#' quantile-based format. See Bracher et al. (2021). Smaller values are better.
+#'
+#' As the name suggest the score assumes that a forecast comes in the form of
+#' one or multiple central prediction intervals. A prediction interval is
+#' characterised by a lower and an upper bound formed by a pair of predictive
+#' quantiles. For example, a 50% central prediction interval is formed by the
+#' 0.25 and 0.75 quantiles of the predictive distribution.
+#'
+#' **Interval score**
+#'
+#' The interval score (IS) is the sum of three components:
+#' overprediction, underprediction and dispersion. For a single prediction
+#' interval only one of the components is non-zero. If for a single prediction
+#' interval the observed value is below the lower bound, then the interval
+#' score is equal to the absolute difference between the lower bound and the
+#' observed value ("underprediction"). "Overprediction" is defined analogously.
+#' If the observed value falls within the bounds of the prediction interval,
+#' then the interval score is equal to the width of the prediction interval,
+#' i.e. the difference between the upper and lower bound. For a single interval,
+#' we therefore have:
+#'
+#' \deqn{
+#' \textrm{IS} = (\textrm{upper} - \textrm{lower}) + \frac{2}{\alpha}(\textrm{lower}
+#'  - \textrm{observed}) *
+#' \mathbf{1}(\textrm{observed} < \textrm{lower}) +
+#' \frac{2}{\alpha}(\textrm{observed} - \textrm{upper}) *
+#' \mathbf{1}(\textrm{observed} > \textrm{upper})
+#' }{
+#' score = (upper - lower) + 2/alpha * (lower - observed) *
+#' 1(observed < lower) + 2/alpha * (observed - upper) *
+#' 1(observed > upper)
+#' }
+#' where \eqn{\mathbf{1}()}{1()} is the indicator function and
+#' indicates how much is outside the prediction interval.
+#' \eqn{\alpha}{alpha} is the decimal value that indicates how much is outside
+#' the prediction interval. For a 90% prediction interval, for example,
+#' \eqn{\alpha}{alpha} is equal to 0.1. No specific distribution is assumed,
+#' but the range has to be symmetric (i.e you can't use the 0.1 quantile
+#' as the lower bound and the 0.7 quantile as the upper).
+#' Non-symmetric quantiles can be scored using the function [quantile_score()].
+#'
+#' Usually the interval score is weighted by a factor that makes sure that the
+#' average score across an increasing number of equally spaced
+#' quantiles, converges to the continuous ranked probability score (CRPS). This
+#' weighted score is called the weihted interval score (WIS).
+#' The weight commonly used is \eqn{\alpha / 2}{alpha / 2}.
+#'
+#' **Quantile score**
+#'
+#' In addition to the interval score, there also exists a quantile score (QS)
+#' (see [quantile_score()]), which is equal to the so-called pinball loss.
+#' The quantile score can be computed for a single quantile (whereas the
+#' interval score requires two quantiles that form an interval). However,
+#' the intuitive decomposition into overprediction, underprediction and
+#' dispersion does not exist for the quantile score.
+#'
+#' **Two versions of the weighted interval score**
+#'
+#' There are two ways to conceptualise the weighted interval score across
+#' several quantiles / prediction intervals and the median.
+#'
+#' In one view, you would treat the WIS as the average of quantile scores (and
+#' the median as 0.5-quantile) (this is the default for `wis()`). In another
+#' view, you would treat the WIS as the average of several interval scores +
+#' the difference between observed value and median forecast. The effect of
+#' that is that in contrast to the first view, the median has twice as much
+#' weight (because it is weighted like a prediction interval, rather than like
+#' a single quantile). Both are valid ways to conceptualise the WIS and you
+#' can control the behvaviour with the `count_median_twice`-argument.
+#'
+#' **WIS components**:
+#' WIS components can be computed individually using the functions
+#' `overprediction`, `underprediction`, and `dispersion.`
+#'
 #' @inheritParams interval_score
 #' @param predicted vector of size n with the predicted values
 #' @param quantile vector with quantile levels of size N
 #' @param count_median_twice if TRUE, count the median twice in the score
 #' @param na.rm if TRUE, ignore NA values when computing the score
 #' @importFrom stats weighted.mean
+#' @return
+#' `wis()`: a numeric vector with WIS values (one per observation), or a list
+#' with separate entries if `separate_results` is `TRUE`.
 #' @export
 wis <- function(observed,
                 predicted,
@@ -63,6 +142,46 @@ wis <- function(observed,
 }
 
 
+#' @return
+#' `dispersion()`: a numeric vector with dispersion values (one per observation)
+#' @param ... Additional arguments passed on to `wis()` from functions
+#' `overprediction()`, `underprediction()` and `dispersion()`
+#' @export
+#' @rdname wis
+dispersion <- function(observed, predicted, quantile, ...) {
+  args <- list(...)
+  args$separate_results <- TRUE
+  assert_input_quantile(observed, predicted, quantile)
+  do.call(wis, c(list(observed), list(predicted), list(quantile), args))$dispersion
+}
+
+
+#' @return
+#' `overprediction()`: a numeric vector with overprediction values (one per
+#' observation)
+#' @export
+#' @rdname wis
+overprediction <- function(observed, predicted, quantile, ...) {
+  args <- list(...)
+  args$separate_results <- TRUE
+  assert_input_quantile(observed, predicted, quantile)
+  do.call(wis, c(list(observed), list(predicted), list(quantile), args))$overprediction
+}
+
+
+#' @return
+#' `underprediction()`: a numeric vector with underprediction values (one per
+#' observation)
+#' @export
+#' @rdname wis
+underprediction <- function(observed, predicted, quantile, ...) {
+  args <- list(...)
+  args$separate_results <- TRUE
+  assert_input_quantile(observed, predicted, quantile)
+  do.call(wis, c(list(observed), list(predicted), list(quantile), args))$underprediction
+}
+
+
 #' @title Interval Coverage (For Quantile-Based Forecasts)
 #' @description Check whether the observed value is within a given central
 #' prediction interval. The prediction interval is defined by a lower and an

diff --git a/R/score.R b/R/score.R
@@ -295,11 +295,6 @@ score.scoringutils_quantile_new <- function(data, metrics = metrics_quantile, ..
       fun <- metrics[[i]]
       matching_args <- filter_function_args(fun, args)
 
-      if ("separate_results" %in% names(matching_args) &&
-          matching_args$separate_results) {
-        metric_name <- c(metric_name, "dispersion", "underprediction", "overprediction")
-      }
-
       data[, eval(metric_name) := do.call(
         fun, c(list(observed), list(predicted), list(quantile), matching_args)
       )]

diff --git a/data/metrics_quantile.rda b/data/metrics_quantile.rda
diff --git a/inst/create-list-available-forecasts.R b/inst/create-list-available-forecasts.R
@@ -24,6 +24,9 @@ usethis::use_data(metrics_sample, overwrite = TRUE)
 
 metrics_quantile <- list(
   "wis" = wis,
+  "overprediction" = overprediction,
+  "underprediction" = underprediction,
+  "dispersion" = dispersion,
   "bias" = bias_quantile,
   "coverage_50" = \(...) {run_safely(..., range = 50, fun = interval_coverage_quantile)},
   "coverage_90" = \(...) {run_safely(..., range = 90, fun = interval_coverage_quantile)}

diff --git a/man/metrics_quantile.Rd b/man/metrics_quantile.Rd
diff --git a/man/wis.Rd b/man/wis.Rd