Merge branch 'main' into condition-handling

epiforecasts · Feb 29, 2024 · 0079d5c · 0079d5c
2 parents a85c5c1 + 1fea902
commit 0079d5c
Show file tree

Hide file tree

Showing 29 changed files with 143 additions and 2,458 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -54,7 +54,6 @@ Imports:
     checkmate, 
     cli,
     data.table,
-    ggdist (>= 3.2.0),
     ggplot2 (>= 3.4.0),
     lifecycle,
     methods,

diff --git a/NAMESPACE b/NAMESPACE
@@ -22,7 +22,6 @@ S3method(validate_forecast,forecast_binary)
 S3method(validate_forecast,forecast_point)
 S3method(validate_forecast,forecast_quantile)
 S3method(validate_forecast,forecast_sample)
-export(add_coverage)
 export(add_pairwise_comparison)
 export(ae_median_quantile)
 export(ae_median_sample)
@@ -35,6 +34,7 @@ export(correlation)
 export(crps_sample)
 export(dispersion)
 export(dss_sample)
+export(get_coverage)
 export(get_duplicate_forecasts)
 export(get_forecast_counts)
 export(get_forecast_type)
@@ -47,8 +47,6 @@ export(log_shift)
 export(logs_binary)
 export(logs_sample)
 export(mad_sample)
-export(make_NA)
-export(make_na)
 export(merge_pred_and_obs)
 export(new_forecast)
 export(overprediction)
@@ -61,7 +59,6 @@ export(plot_heatmap)
 export(plot_interval_coverage)
 export(plot_pairwise_comparison)
 export(plot_pit)
-export(plot_predictions)
 export(plot_quantile_coverage)
 export(plot_score_table)
 export(plot_wis)
@@ -134,7 +131,6 @@ importFrom(data.table,setDT)
 importFrom(data.table,setattr)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setnames)
-importFrom(ggdist,geom_lineribbon)
 importFrom(ggplot2,.data)
 importFrom(ggplot2,`%+replace%`)
 importFrom(ggplot2,aes)
@@ -150,7 +146,6 @@ importFrom(ggplot2,geom_col)
 importFrom(ggplot2,geom_histogram)
 importFrom(ggplot2,geom_line)
 importFrom(ggplot2,geom_linerange)
-importFrom(ggplot2,geom_point)
 importFrom(ggplot2,geom_polygon)
 importFrom(ggplot2,geom_text)
 importFrom(ggplot2,geom_tile)

diff --git a/NEWS.md b/NEWS.md
@@ -26,7 +26,7 @@ The update introduces breaking changes. If you want to keep using the older vers
 - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`.
 - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds columns with relative skil scores and scaled relative skill scores.
-- `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`)
+- `add_coverage()` was replaced by a new function, `get_coverage()`. This function comes with an updated workflow where coverage values are computed directly based on the original data and can then be visualised using `plot_interval_coverage()` or `plot_quantile_coverage()`. An example worfklow would be `example_quantile |> as_forecast() |> get_coverage(by = "model") |> plot_interval_coverage()`.
 - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs)
     - The function `bias_range()` was removed (users should now use `bias_quantile()` instead)
     - The function `interval_score()` was made an internal function rather than being exported to users. We recommend using `wis()` instead. 
@@ -51,6 +51,7 @@ The update introduces breaking changes. If you want to keep using the older vers
 - Added a method for `print()` that prints out additional information for `forecast` objects. 
 - Added a subsetting `[` operator for scores, so that the score name attribute gets preserved when subsetting.
 - Deleted the function `plot_ranges()`. If you want to continue using the functionality, you can find the function code [here](https://github.com/epiforecasts/scoringutils/issues/462).
+- Removed the function `plot_predictions()`, as well as its helper function `make_NA()`, in favour of a dedicated Vignette that shows different ways of visualising predictions. For future reference, the function code can be found [here](https://github.com/epiforecasts/scoringutils/issues/659) (Issue #659).
 
 # scoringutils 1.2.2
 

diff --git a/R/add_coverage.R b/R/add_coverage.R
@@ -1,6 +1,6 @@
-#' @title Add Coverage Values to Quantile-Based Forecasts
+#' @title Get Quantile And Interval Coverage Values For Quantile-Based Forecasts
 #'
-#' @description Adds interval coverage of central prediction intervals,
+#' @description Compute interval coverage of central prediction intervals,
 #' quantile coverage for predictive quantiles, as well as the deviation between
 #' desired and actual coverage to a data.table. Forecasts should be in a
 #' quantile format (following the input requirements of `score()`).
@@ -9,17 +9,19 @@
 #'
 #' Interval coverage for a given interval range is defined as the proportion of
 #' observations that fall within the corresponding central prediction intervals.
-#' Central prediction intervals are symmetric around the median and and formed
+#' Central prediction intervals are symmetric around the median and formed
 #' by two quantiles that denote the lower and upper bound. For example, the 50%
 #' central prediction interval is the interval between the 0.25 and 0.75
 #' quantiles of the predictive distribution.
 #'
-#' The function `add_coverage()` computes the coverage per central prediction
-#' interval, so the interval coverage will always be either `TRUE`
-#' (observed value falls within the interval) or `FALSE`  (observed value falls
-#' outside the interval). You can summarise the interval coverage values to get
-#' the proportion of observations that fall within the central prediction
-#' intervals.
+#' The function `get_coverage()` computes the coverage per central prediction
+#' interval. This means that if you set `by` to the unit of a single forecast,
+#' interval coverage will always be either `TRUE`
+#' (observed value falls within the interval) or `FALSE` (observed value falls
+#' outside the interval) and analogously for quantile coverage.
+#' Coverage values become meaningful by summarising them across different
+#' dimensions, as specified in the `by` argument (thereby returning the
+#' proportion of values covered by all prediction intervals/quantiles).
 #'
 #' **Quantile coverage**
 #'
@@ -38,48 +40,61 @@
 #' coverage is 80%, the coverage deviation is -0.1.
 #'
 #' @inheritParams score
-#' @return a data.table with the input and columns "interval_coverage",
+#' @param by character vector that denotes the level of grouping for which the
+#' coverage values should be computed. By default (`"model"`), one coverage
+#' value per model will be returned.
+#' @return a data.table with columns "interval_coverage",
 #' "interval_coverage_deviation", "quantile_coverage",
-#' "quantile_coverage_deviation" added.
+#' "quantile_coverage_deviation" and the columns specified in `by`.
 #' @importFrom data.table setcolorder
+#' @importFrom checkmate assert_subset
 #' @examples
 #' library(magrittr) # pipe operator
 #' example_quantile %>%
-#'   add_coverage()
+#'   as_forecast() %>%
+#'   get_coverage(by = "model")
 #' @export
 #' @keywords scoring
 #' @export
-add_coverage <- function(data) {
-  stored_attributes <- get_scoringutils_attributes(data)
-  data <- as_forecast(data)
-  forecast_unit <- get_forecast_unit(data)
-  data_cols <- colnames(data) # store so we can reset column order later
+get_coverage <- function(data, by = "model") {
+  # input checks ---------------------------------------------------------------
+  data <- as_forecast(na.omit(data), forecast_type = "quantile")
 
+  # remove "quantile_level" and "interval_range" from `by` if present, as these
+  # are included anyway
+  by <- setdiff(by, c("quantile_level", "interval_range"))
+  assert_subset(by, names(data))
+
+  # convert to wide interval format and compute interval coverage --------------
   interval_data <- quantile_to_interval(data, format = "wide")
   interval_data[,
     interval_coverage := (observed <= upper) & (observed >= lower)
   ][, c("lower", "upper", "observed") := NULL]
+  interval_data[, interval_coverage_deviation :=
+                  interval_coverage - interval_range / 100]
 
+  # merge interval range data with original data -------------------------------
+  # preparations
   data[, interval_range := get_range_from_quantile(quantile_level)]
+  data_cols <- colnames(data) # store so we can reset column order later
+  forecast_unit <- get_forecast_unit(data)
 
   data <- merge(data, interval_data,
                 by = unique(c(forecast_unit, "interval_range")))
-  data[, interval_coverage_deviation :=
-         interval_coverage - interval_range / 100]
+
+  # compute quantile coverage and deviation ------------------------------------
   data[, quantile_coverage := observed <= predicted]
   data[, quantile_coverage_deviation := quantile_coverage - quantile_level]
 
+  # summarise coverage values according to `by` and cleanup --------------------
   # reset column order
   new_metrics <- c("interval_coverage", "interval_coverage_deviation",
                    "quantile_coverage", "quantile_coverage_deviation")
   setcolorder(data, unique(c(data_cols, "interval_range", new_metrics)))
-
-  # add coverage "metrics" to list of stored metrics
-  # this makes it possible to use `summarise_scores()` later on
-  stored_attributes[["score_names"]] <- c(
-    stored_attributes[["score_names"]],
-    new_metrics
-  )
-  data <- assign_attributes(data, stored_attributes)
+  # remove forecast class and convert to regular data.table
+  data <- as.data.table(data)
+  by <- unique(c(by, "quantile_level", "interval_range"))
+  # summarise
+  data <- data[, lapply(.SD, mean), by = by, .SDcols = new_metrics]
   return(data[])
 }
diff --git a/R/get_-functions.R b/R/get_-functions.R
@@ -132,8 +132,8 @@ get_type <- function(x) {
 
 #' @title Get Names Of The Scoring Rules That Were Used For Scoring
 #' @description
-#' When applying a scoring rule, (for example through [score()] or
-#' [add_coverage()], the names of the scoring rules become column names of the
+#' When applying a scoring rule via [score()], the names of the scoring rules
+#' become column names of the
 #' resulting data.table. In addition, an attribute `score_names` will be
 #' added to the output, holding the names of the scores as a vector.
 #' This is done so that a function like [get_forecast_unit()] can still