Issue #851 - Create S3 methods for converting to point and quantile f…

…orecasts (#876) * first draft for as_forecast_point and as_forecast_quantile * update docs and tests * Update docs * Update News file * Update news file again * fix pkgdown and R4.0 issue with pipe
epiforecasts · Jul 30, 2024 · 4d9ecf7 · 4d9ecf7
1 parent 22af418
commit 4d9ecf7
Show file tree

Hide file tree

Showing 16 changed files with 426 additions and 185 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(`[`,scores)
+S3method(as_forecast_point,default)
+S3method(as_forecast_point,forecast_quantile)
+S3method(as_forecast_quantile,default)
+S3method(as_forecast_quantile,forecast_sample)
 S3method(assert_forecast,default)
 S3method(assert_forecast,forecast_binary)
 S3method(assert_forecast,forecast_point)
@@ -64,7 +68,6 @@ export(plot_pit)
 export(plot_quantile_coverage)
 export(plot_wis)
 export(quantile_score)
-export(sample_to_quantile)
 export(score)
 export(se_mean_sample)
 export(select_metrics)

diff --git a/NEWS.md b/NEWS.md
@@ -22,7 +22,8 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape
   - the CRPS is now reported as decomposition into dispersion, overprediction and underprediction.
 
 ### Creating a forecast object  
-- The `as_forecast_<type>()` functions create a forecast object and validates it. They also allow users to rename/specify required columns and specify the forecast unit in a single step, taking over the functionality of `set_forecast_unit()` in most cases. 
+- The `as_forecast_<type>()` functions create a forecast object and validates it. They also allow users to rename/specify required columns and specify the forecast unit in a single step, taking over the functionality of `set_forecast_unit()` in most cases. See `?as_forecast()` for more information.
+- Some `as_forecast_<type>()` functions like e.g. `as_forecast_point()` and `as_forecast_quantile()` have S3 methods for converting from another forecast type to the respective forecast type. For example, `as_forecast_quantile()` has a method for converting from a `forecast_sample` object to a `forecast_quantile` object by estimating quantiles from the samples.
 
 ### Updated workflows
 - An example workflow for scoring a forecast now looks like this: 
@@ -281,7 +282,7 @@ the mean before returning an output.
 ### Bug fixes
 
 - Testing was expanded
-- Minor bugs were fixed, for example a bug in the `sample_to_quantile()` function
+- Minor bugs were fixed, for example a bug in the `as_forecast_quantile()` function
 (https://github.com/epiforecasts/scoringutils/pull/223)
 
 ### Package data updated

diff --git a/R/forecast.R b/R/forecast.R
@@ -5,18 +5,24 @@
 #' and observations. If the input passes all input checks, it will be converted
 #' to a `forecast` object. A forecast object is a `data.table` with a
 #' class `forecast` and an additional class that depends on the forecast type.
-#' Every forecast type has its own `as_forecast_()` function.
+#' Every forecast type has its own `as_forecast_<type>()` function.
 #' See the details section below for more information
 #' on the expected input formats.
 #'
-#' The `as_forecast_()` functions give users some control over how their data
-#' is parsed.
+#' The `as_forecast_<type>()` functions give users some control over how their
+#' data is parsed.
 #' Using the arguments `observed`, `predicted`, `model`, etc. users can rename
 #' existing columns of their input data to match the required columns for a
 #' forecast object. Using the argument `forecast_unit`, users can specify the
 #' the columns that uniquely identify a single forecast (and remove the others,
 #' see [set_forecast_unit()] for details).
 #'
+#' The following functions are available:
+#' - [as_forecast_point()]
+#' - [as_forecast_binary()]
+#' - [as_forecast_sample()]
+#' - [as_forecast_quantile()]
+#'
 #' @param data A data.frame (or similar) with predicted and observed values.
 #'   See the details section of [as_forecast()] for additional information
 #'   on required input formats.
@@ -43,6 +49,8 @@
 #' - `forecast_sample` for sample-based forecasts
 #' - `forecast_quantile` for quantile-based forecasts
 #' @keywords check-forecasts
+#' @seealso [as_forecast_point()], [as_forecast_binary()],
+#'   [as_forecast_sample()], [as_forecast_quantile()]
 #' @examples
 #' as_forecast_binary(example_binary)
 #' as_forecast_quantile(
@@ -99,48 +107,105 @@ as_forecast_generic <- function(data,
 }
 
 
-#' @rdname as_forecast
+#' @title Create a `forecast` object for binary forecasts
+#' @description
+#' Create a `forecast` object for binary forecasts. See more information on
+#' forecast types and expected input formats by calling `?`[as_forecast()].
 #' @export
+#' @inheritParams as_forecast
+#' @seealso [as_forecast()], [as_forecast_point()], [as_forecast_binary()],
+#'   [as_forecast_sample()], [as_forecast_quantile()]
 #' @importFrom cli cli_warn
-as_forecast_point <- function(data,
-                              forecast_unit = NULL,
-                              observed = NULL,
-                              predicted = NULL,
-                              model = NULL) {
+#' @keywords check-forecasts
+as_forecast_binary <- function(data,
+                               forecast_unit = NULL,
+                               observed = NULL,
+                               predicted = NULL,
+                               model = NULL) {
   data <- as_forecast_generic(data, forecast_unit, observed, predicted, model)
-  data <- new_forecast(data, "forecast_point")
+  data <- new_forecast(data, "forecast_binary")
   assert_forecast(data)
   return(data)
 }
 
 
-#' @rdname as_forecast
+#' @title Create a `forecast` object for point forecasts
+#' @description
+#' Create a `forecast` object for point forecasts. See more information on
+#' forecast types and expected input formats by calling `?`[as_forecast()].
+#' @inherit as_forecast params
+#' @param ... Unused
+#' @seealso [as_forecast()], [as_forecast_point()], [as_forecast_binary()],
+#'   [as_forecast_sample()], [as_forecast_quantile()]
+#' @export
+#' @keywords check-forecasts
+as_forecast_point <- function(data, ...) {
+  UseMethod("as_forecast_point")
+}
+
+
+#' @rdname as_forecast_point
 #' @export
 #' @importFrom cli cli_warn
-as_forecast_binary <- function(data,
-                               forecast_unit = NULL,
-                               observed = NULL,
-                               predicted = NULL,
-                               model = NULL) {
+as_forecast_point.default <- function(data,
+                                      forecast_unit = NULL,
+                                      observed = NULL,
+                                      predicted = NULL,
+                                      model = NULL,
+                                      ...) {
   data <- as_forecast_generic(data, forecast_unit, observed, predicted, model)
-  data <- new_forecast(data, "forecast_binary")
+  data <- new_forecast(data, "forecast_point")
   assert_forecast(data)
   return(data)
 }
 
 
-#' @rdname as_forecast
+#' @rdname as_forecast_point
+#' @description
+#' When converting a `forecast_quantile` object into a `forecast_point` object,
+#' the 0.5 quantile is extracted and returned as the point forecast.
+#' @export
+#' @keywords check-forecasts
+as_forecast_point.forecast_quantile <- function(data, ...) {
+  assert_forecast(data, verbose = FALSE)
+  assert_subset(0.5, unique(data$quantile_level))
+
+  forecast <- data[quantile_level == 0.5]
+  forecast[, "quantile_level" := NULL]
+
+  point_forecast <- new_forecast(forecast, "forecast_point")
+  return(point_forecast)
+}
+
+
+#' @title Create a `forecast` object for quantile-based forecasts
+#' @description
+#' Create a `forecast` object for quantile-based forecasts. See more information
+#' on forecast types and expected input formats by calling `?`[as_forecast()].
+#' @param ... Unused
+#' @seealso [as_forecast()], [as_forecast_point()], [as_forecast_binary()],
+#'   [as_forecast_sample()], [as_forecast_quantile()]
+#' @inheritParams as_forecast
+#' @export
+#' @keywords check-forecasts
+as_forecast_quantile <- function(data, ...) {
+  UseMethod("as_forecast_quantile")
+}
+
+
+#' @rdname as_forecast_quantile
 #' @param quantile_level (optional) Name of the column in `data` that contains
 #'   the quantile level of the predicted values. This column will be renamed to
 #'   "quantile_level". Only applicable to quantile-based forecasts.
 #' @export
 #' @importFrom cli cli_warn
-as_forecast_quantile <- function(data,
-                                 forecast_unit = NULL,
-                                 observed = NULL,
-                                 predicted = NULL,
-                                 model = NULL,
-                                 quantile_level = NULL) {
+as_forecast_quantile.default <- function(data,
+                                         forecast_unit = NULL,
+                                         observed = NULL,
+                                         predicted = NULL,
+                                         model = NULL,
+                                         quantile_level = NULL,
+                                         ...) {
   assert_character(quantile_level, len = 1, null.ok = TRUE)
   assert_subset(quantile_level, names(data), empty.ok = TRUE)
   if (!is.null(quantile_level)) {
@@ -154,12 +219,61 @@ as_forecast_quantile <- function(data,
 }
 
 
-#' @rdname as_forecast
+#' @rdname as_forecast_quantile
+#' @description
+#' When creating a `forecast_quantile` object from a `forecast_sample` object,
+#' the quantiles are estimated by computing empircal quantiles from the samples
+#' via [quantile()]. Note that empirical quantiles are a biased estimator for
+#' the true quantiles in particular in the tails of the distribution and
+#' when the number of available samples is low.
+#' @param probs A numeric vector of quantile levels for which
+#'   quantiles will be computed. Corresponds to the `probs` argument in
+#'   [quantile()].
+#' @param type Type argument passed down to the quantile function. For more
+#'   information, see [quantile()].
+#' @importFrom stats quantile
+#' @importFrom methods hasArg
+#' @importFrom checkmate assert_numeric
+#' @export
+as_forecast_quantile.forecast_sample <- function(
+  data,
+  probs = c(0.05, 0.25, 0.5, 0.75, 0.95),
+  type = 7,
+  ...
+) {
+  forecast <- copy(data)
+  assert_forecast(forecast, verbose = FALSE)
+  assert_numeric(probs, min.len = 1)
+  reserved_columns <- c("predicted", "sample_id")
+  by <- setdiff(colnames(forecast), reserved_columns)
+
+  quantile_level <- unique(
+    round(c(probs, 1 - probs), digits = 10)
+  )
+
+  forecast <-
+    forecast[, .(quantile_level = quantile_level,
+                 predicted = quantile(x = predicted, probs = ..probs,
+                                      type = ..type, na.rm = TRUE)),
+             by = by]
+
+  quantile_forecast <- new_forecast(forecast, "forecast_quantile")
+  assert_forecast(quantile_forecast)
+
+  return(quantile_forecast)
+}
+
+
+#' @title Create a `forecast` object for sample-based forecasts
 #' @param sample_id (optional) Name of the column in `data` that contains the
 #'   sample id. This column will be renamed to "sample_id". Only applicable to
 #'   sample-based forecasts.
+#' @inheritParams as_forecast
 #' @export
+#' @seealso [as_forecast()], [as_forecast_point()], [as_forecast_binary()],
+#'   [as_forecast_sample()], [as_forecast_quantile()]
 #' @importFrom cli cli_warn
+#' @keywords check-forecasts
 as_forecast_sample <- function(data,
                                forecast_unit = NULL,
                                observed = NULL,

diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R
@@ -1,51 +1,3 @@
-#' @title Change data from a sample based format to a quantile format
-#'
-#' @description
-#' Transform data from a format that is based on predictive samples to a format
-#' based on plain quantiles.
-#'
-#' @param forecast A `forecast` object of class `forecast_sample` (a validated
-#'   data.table with predicted and observed values, see [as_forecast()]).
-#'
-#' @param quantile_level A numeric vector of quantile levels for which
-#'   quantiles will be computed.
-#' @param type Type argument passed down to the quantile function. For more
-#'   information, see [quantile()].
-#' @return a data.table in a long interval range format
-#' @importFrom data.table as.data.table
-#' @importFrom stats quantile
-#' @importFrom methods hasArg
-#' @importFrom checkmate assert_numeric
-#' @keywords data-handling
-#' @export
-#' @examples
-#' library(magrittr) # pipe operator
-#' example_sample_discrete %>%
-#'   as_forecast_sample() %>%
-#'   sample_to_quantile()
-sample_to_quantile <- function(forecast,
-                               quantile_level = c(0.05, 0.25, 0.5, 0.75, 0.95),
-                               type = 7) {
-  forecast <- copy(forecast)
-  assert_forecast(forecast, forecast_type = "sample", verbose = FALSE)
-  assert_numeric(quantile_level, min.len = 1)
-  reserved_columns <- c("predicted", "sample_id")
-  by <- setdiff(colnames(forecast), reserved_columns)
-
-  quantile_level <- unique(
-    round(c(quantile_level, 1 - quantile_level), digits = 10)
-  )
-
-  forecast <-
-    forecast[, .(quantile_level = quantile_level,
-                 predicted = quantile(x = predicted, probs = ..quantile_level,
-                                      type = ..type, na.rm = TRUE)),
-             by = by]
-
-  return(as_forecast_quantile(forecast))
-}
-
-
 # ==================== Functions internally used for scoring ===================
 # These functions would ideally be replaced in the future
 
@@ -186,7 +138,7 @@ quantile_to_interval_numeric <- function(observed,
 #' Transform data from a format that is based on predictive samples to a format
 #' based on interval ranges.
 #'
-#' @inheritParams sample_to_quantile
+#' @inheritParams as_forecast_quantile
 #' @param keep_quantile_col keep quantile_level column, default is TRUE
 #' @return A data.table in a long interval interval range format
 #' @importFrom data.table as.data.table
@@ -203,9 +155,9 @@ sample_to_interval_long <- function(data,
   upper_quantiles <- 1 - lower_quantiles
   quantile_levels <- sort(unique(c(lower_quantiles, upper_quantiles)))
 
-  data <- sample_to_quantile(
+  data <- as_forecast_quantile(
     data,
-    quantile_level = quantile_levels,
+    probs = quantile_levels,
     type = type
   )
 

diff --git a/R/z_globalVariables.R b/R/z_globalVariables.R
@@ -1,6 +1,6 @@
 globalVariables(c(
   "..index",
-  "..quantile_level",
+  "..probs",
   "..type",
   ".",
   ".SD",

diff --git a/inst/manuscript/R/00-standalone-Figure-replication.R b/inst/manuscript/R/00-standalone-Figure-replication.R
@@ -143,7 +143,7 @@ pit_plots <- plot_pit(stored$pit) +
 # create interval and quantile coverage plots ----------------------------------
 # create coverage plots by transforming to quantile format first
 quantiles <- c(0.01, 0.025, seq(0.05, 0.95, 0.05), 0.975, 0.99)
-df_quantile <- sample_to_quantile(df,
+df_quantile <- as_forecast_quantile(df,
                                   quantiles = quantiles)
 
 res_quantile <- score(df_quantile)

diff --git a/inst/manuscript/R/toy-example-calibration.R b/inst/manuscript/R/toy-example-calibration.R
@@ -72,7 +72,7 @@ pit_plots <- plot_pit(stored$pit) +
 # create interval and quantile coverage plots ----------------------------------
 # create coverage plots by transforming to quantile format first
 quantiles <- c(0.01, 0.025, seq(0.05, 0.95, 0.05), 0.975, 0.99)
-df_quantile <- sample_to_quantile(df,
+df_quantile <- as_forecast_quantile(df,
                                   quantile_level = quantiles)
 
 res_quantile <- score(df_quantile)