From dd609af6d65dd2372c7c2d17abbc6c117dae3997 Mon Sep 17 00:00:00 2001 From: Nikos Bosse <37978797+nikosbosse@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:51:29 +0200 Subject: [PATCH] Handling case in WIS where quantile levels don't form valid intervals (#943) * set default for wis to na.rm = FALSE * Update News file * fix linting issues * add isTRUE * Make explanation in NEWS.md more verbose --- NEWS.md | 1 + R/metrics-quantile.R | 23 ++++++++++++++++++- man/wis.Rd | 2 +- tests/testthat/test-class-forecast-quantile.R | 16 +++++++++---- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 060b75636..a4899aa66 100644 --- a/NEWS.md +++ b/NEWS.md @@ -88,6 +88,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape ### Function changes - `bias_quantile()` changed the way it handles forecasts where the median is missing: The median is now imputed by linear interpolation between the innermost quantiles. Previously, we imputed the median by simply taking the mean of the innermost quantiles. - In contrast to the previous `correlation` function, `get_correlations` doesn't round correlations by default. Instead, `plot_correlations` now has a `digits` argument that allows users to round correlations before plotting them. Alternatively, using `dplyr`, you could call something like `mutate(correlations, across(where(is.numeric), \(x) signif(x, digits = 2)))` on the output of `get_correlations`. +- `wis()` now errors by default if not all quantile levels form valid prediction intervals and returns `NA` if there are missing values. Previously, `na.rm` was set to `TRUE` by default, which could lead to unexpected results, if users are not aware of this. ### Internal package updates - The deprecated `..density..` was replaced with `after_stat(density)` in ggplot calls. diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R index 096f64061..ddb14f35a 100644 --- a/R/metrics-quantile.R +++ b/R/metrics-quantile.R @@ -180,10 +180,31 @@ wis <- function(observed, separate_results = FALSE, weigh = TRUE, count_median_twice = FALSE, - na.rm = TRUE) { + na.rm = FALSE) { assert_input_quantile(observed, predicted, quantile_level) reformatted <- quantile_to_interval(observed, predicted, quantile_level) + # check that all quantile levels form valid prediction intervals + interval_ranges <- get_range_from_quantile( + quantile_level[quantile_level != 0.5] + ) + complete_intervals <- + duplicated(interval_ranges) | duplicated(interval_ranges, fromLast = TRUE) + if (!all(complete_intervals) && !isTRUE(na.rm)) { + #nolint start: keyword_quote_linter object_usage_linter + incomplete <- quantile_level[quantile_level != 0.5][!complete_intervals] + cli_abort( + c( + "!" = "Not all quantile levels specified form symmetric prediction + intervals. + The following quantile levels miss a corresponding lower/upper bound: + {.val {incomplete}}. + You can drop incomplete prediction intervals using `na.rm = TRUE`." + ) + ) + #nolint end + } + assert_logical(separate_results, len = 1) assert_logical(weigh, len = 1) assert_logical(count_median_twice, len = 1) diff --git a/man/wis.Rd b/man/wis.Rd index f5785fa85..5452dd231 100644 --- a/man/wis.Rd +++ b/man/wis.Rd @@ -14,7 +14,7 @@ wis( separate_results = FALSE, weigh = TRUE, count_median_twice = FALSE, - na.rm = TRUE + na.rm = FALSE ) dispersion_quantile(observed, predicted, quantile_level, ...) diff --git a/tests/testthat/test-class-forecast-quantile.R b/tests/testthat/test-class-forecast-quantile.R index 87d746bfd..ecff751f7 100644 --- a/tests/testthat/test-class-forecast-quantile.R +++ b/tests/testthat/test-class-forecast-quantile.R @@ -300,23 +300,29 @@ test_that("score() works even if only some quantiles are missing", { )) ) - # asymmetric intervals asymm <- example_quantile[!quantile_level > 0.6] + metrics <- get_metrics(example_quantile) + metrics$wis <- purrr::partial(wis, na.rm = TRUE) expect_warning( expect_warning( - score_a <- score(asymm) %>% summarise_scores(by = "model"), + score_a <- score(asymm, metrics = metrics) %>% + summarise_scores(by = "model"), "Computation for `interval_coverage_50` failed." ), "Computation for `interval_coverage_90` failed." ) + # expect a failure with the regular wis wihtout ma.rm=TRUE + expect_warning( + score(asymm, metrics = c(wis = wis)), + "Not all quantile levels specified form symmetric prediction intervals." + ) + # check that the result is equal to a case where we discard the entire # interval in terms of WIS inner <- example_quantile[quantile_level %in% c(0.4, 0.45, 0.5, 0.55, 0.6)] - score_b <- score(inner, get_metrics( - inner, exclude = c("interval_coverage_50", "interval_coverage_90") - )) %>% + score_b <- score(inner, metrics = c(wis = wis)) %>% summarise_scores(by = "model") expect_equal( score_a$wis,