Issue #925 - Remove function interval_coverage_deviation() (#928)

* Remove function `interval_coverage_deviation()` * fix tests
epiforecasts · Sep 30, 2024 · c7bc1be · c7bc1be
1 parent afa79bc
commit c7bc1be
Show file tree

Hide file tree

Showing 9 changed files with 125 additions and 362 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -58,7 +58,6 @@ export(get_metrics)
 export(get_pairwise_comparisons)
 export(get_pit)
 export(interval_coverage)
-export(interval_coverage_deviation)
 export(is_forecast)
 export(is_forecast_binary)
 export(is_forecast_nominal)

diff --git a/NEWS.md b/NEWS.md
@@ -70,7 +70,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape
 
 ### Renamed functions
 - The function `find_duplicates()` was renamed to `get_duplicate_forecasts()`. 
-- Renamed `interval_coverage_quantile()` and `interval_coverage_dev_quantile()` to `interval_coverage()` and `interval_coverage_deviation()`, respectively. 
+- Renamed `interval_coverage_quantile()` to `interval_coverage()`. 
 - "range" was consistently renamed to "interval_range" in the code. The "range"-format (which was mostly used internally) was renamed to "interval"-format
 - Renamed `correlation()` to `get_correlations()` and `plot_correlation()` to `plot_correlations()`
 - `pit()` was renamed to `get_pit()` and converted to an S3 method.
@@ -83,6 +83,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape
 - Removed the function `merge_pred_and_obs()` that was used to merge two separate data frames with forecasts and observations. We moved its contents to a new "Deprecated functions"-vignette.
 - Removed `interval_coverage_sample()` as users are now expected to convert to a quantile format first before scoring.
 - Function `set_forecast_unit()` was deleted. Instead there is now a `forecast_unit` argument in `as_forecast_<type>()` as well as in `get_duplicate_forecasts()`.
+- Removed `interval_coverage_dev_quantile()`. Users can still access the difference between nominal and actual interval coverage using `get_coverage()`.
 
 ### Function changes
 - `bias_quantile()` changed the way it handles forecasts where the median is missing: The median is now imputed by linear interpolation between the innermost quantiles. Previously, we imputed the median by simply taking the mean of the innermost quantiles.

diff --git a/R/default-scoring-rules.R b/R/default-scoring-rules.R
@@ -226,7 +226,6 @@ get_metrics.forecast_sample <- function(x, select = NULL, exclude = NULL, ...) {
 #' - "interval_coverage_90" = purrr::partial(
 #'      interval_coverage, interval_range = 90
 #'    )
-#' - "interval_coverage_deviation" = [interval_coverage_deviation()],
 #' - "ae_median" = [ae_median_quantile()]
 #'
 #' Note: The `interval_coverage_90` scoring rule is created by modifying
@@ -255,7 +254,6 @@ get_metrics.forecast_quantile <- function(x, select = NULL, exclude = NULL, ...)
     interval_coverage_90 = purrr::partial(
       interval_coverage, interval_range = 90
     ),
-    interval_coverage_deviation = interval_coverage_deviation,
     ae_median = ae_median_quantile
   )
   select_metrics(all, select, exclude)

diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R
@@ -294,103 +294,6 @@ interval_coverage <- function(observed, predicted,
 }
 
 
-#' @title Interval coverage deviation (for quantile-based forecasts)
-#' @description
-#' Check the agreement between desired and actual interval coverage
-#' of a forecast.
-#'
-#' The function is similar to [interval_coverage()],
-#' but takes all provided prediction intervals into account and
-#' compares nominal interval coverage (i.e. the desired interval coverage) with
-#' the actual observed interval coverage.
-#'
-#' A central symmetric prediction interval is defined by a lower and an
-#' upper bound formed by a pair of predictive quantiles. For example, a 50%
-#' prediction interval is formed by the 0.25 and 0.75 quantiles of the
-#' predictive distribution. Ideally, a forecaster should aim to cover about
-#' 50% of all observed values with their 50% prediction intervals, 90% of all
-#' observed values with their 90% prediction intervals, and so on.
-#'
-#' For every prediction interval, the deviation is computed as the difference
-#' between the observed interval coverage and the nominal interval coverage
-#' For a single observed value and a single prediction interval, coverage is
-#' always either 0 or 1 (`FALSE` or `TRUE`). This is not the case for a single
-#' observed value and multiple prediction intervals,
-#' but it still doesn't make that much
-#' sense to compare nominal (desired) coverage and actual coverage for a single
-#' observation. In that sense coverage deviation only really starts to make
-#' sense as a metric when averaged across multiple observations).
-#'
-#' Positive values of interval coverage deviation are an indication for
-#' underconfidence, i.e. the forecaster could likely have issued a narrower
-#' forecast. Negative values are an indication for overconfidence, i.e. the
-#' forecasts were too narrow.
-#'
-#' \deqn{
-#' \textrm{interval coverage deviation} =
-#' \mathbf{1}(\textrm{observed value falls within interval}) -
-#' \textrm{nominal interval coverage}
-#' }{
-#' interval coverage deviation =
-#' 1(observed value falls within interval) - nominal interval coverage
-#' }
-#' The interval coverage deviation is then averaged across all prediction
-#' intervals. The median is ignored when computing coverage deviation.
-#' @inheritParams wis
-#' @importFrom cli cli_warn
-#' @return
-#' A numeric vector of length n with the interval coverage deviation
-#' for each forecast (with the forecast itself comprising one or multiple
-#' prediction intervals).
-#' @inheritSection illustration-input-metric-quantile Input format
-#' @export
-#' @keywords metric
-#' @examples
-#' observed <- c(1, -15, 22)
-#' predicted <- rbind(
-#'   c(-1, 0, 1, 2, 3),
-#'   c(-2, 1, 2, 2, 4),
-#'   c(-2, 0, 3, 3, 4)
-#' )
-#' quantile_level <- c(0.1, 0.25, 0.5, 0.75, 0.9)
-#' interval_coverage_deviation(observed, predicted, quantile_level)
-interval_coverage_deviation <- function(observed, predicted, quantile_level) {
-  assert_input_quantile(observed, predicted, quantile_level)
-
-  # transform available quantile_levels into central interval ranges
-  available_ranges <- unique(get_range_from_quantile(quantile_level))
-
-  # check if all necessary quantile_levels are available
-  necessary_quantiles <- unique(
-    c((100 - available_ranges) / 2, 100 - (100 - available_ranges) / 2) / 100
-  )
-  if (!all(necessary_quantiles %in% quantile_level)) {
-    #nolint start: keyword_quote_linter object_usage_linter
-    missing <- necessary_quantiles[!necessary_quantiles %in% quantile_level]
-    cli_warn(
-      c(
-        "x" = "To compute interval coverage deviation, all quantiles must form
-        central symmetric prediction intervals.",
-        "i" = "Missing quantiles: {.val {missing}}. Returning {.val {NA}}."
-      )
-    )
-    #nolint end
-    return(NA)
-  }
-
-  reformatted <- quantile_to_interval(
-    observed, predicted, quantile_level
-  )[interval_range != 0]
-  reformatted[, interval_coverage := (observed >= lower) & (observed <= upper)]
-  reformatted[, interval_coverage_deviation :=
-                interval_coverage - interval_range / 100]
-  out <- reformatted[, .(
-    interval_coverage_deviation = mean(interval_coverage_deviation)
-  ), by = "forecast_id"]
-  return(out$interval_coverage_deviation)
-}
-
-
 #' @title Determines bias of quantile forecasts
 #'
 #' @description

diff --git a/man/get_metrics.forecast_quantile.Rd b/man/get_metrics.forecast_quantile.Rd
diff --git a/man/interval_coverage_deviation.Rd b/man/interval_coverage_deviation.Rd