epiforecasts · seabbs · Dec 19, 2023 · Dec 16, 2023 · Dec 16, 2023 · Dec 16, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,6 +12,7 @@
 ^Meta$
 ^_pkgdown\.yml$
 ^inst/manuscript/manuscript_cache$
+^inst/manuscript/.trackdown$
 ^\.lintr$
 ^docs$
 ^\.devcontainer$

diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ inst/manuscript/manuscript.blg
 inst/manuscript/manuscript.pdf
 inst/manuscript/manuscript.tex
 inst/manuscript/manuscript_files/
+inst/manuscript/.trackdown
 docs
 ..bfg-report/
 .DS_Store

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as_forecast,default)
 S3method(print,scoringutils_check)
 S3method(quantile_to_interval,data.frame)
 S3method(quantile_to_interval,numeric)
@@ -8,16 +9,16 @@ S3method(score,scoringutils_binary)
 S3method(score,scoringutils_point)
 S3method(score,scoringutils_quantile)
 S3method(score,scoringutils_sample)
-S3method(validate,default)
-S3method(validate,scoringutils_binary)
-S3method(validate,scoringutils_point)
-S3method(validate,scoringutils_quantile)
-S3method(validate,scoringutils_sample)
+S3method(validate_forecast,scoringutils_binary)
+S3method(validate_forecast,scoringutils_point)
+S3method(validate_forecast,scoringutils_quantile)
+S3method(validate_forecast,scoringutils_sample)
 export(abs_error)
 export(add_coverage)
 export(add_pairwise_comparison)
 export(ae_median_quantile)
 export(ae_median_sample)
+export(as_forecast)
 export(available_metrics)
 export(bias_quantile)
 export(bias_sample)
@@ -69,7 +70,7 @@ export(summarize_scores)
 export(theme_scoringutils)
 export(transform_forecasts)
 export(underprediction)
-export(validate)
+export(validate_forecast)
 export(validate_general)
 export(wis)
 importFrom(Metrics,ae)

diff --git a/NEWS.md b/NEWS.md
@@ -20,7 +20,7 @@ The update introduces breaking changes. If you want to keep using the older vers
     - `observed`: numeric, either a scalar or a vector
     - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
     - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`.
-- `check_forecasts()` was replaced by a new function `validate()`. `validate()` validates the input and in that sense fulfills the purpose of `check_forecasts()`. It has different methods: `validate.default()` assigns the input a class based on their forecast type. Other methods validate the input specifically for the various forecast types.
+- `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. 
 - `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`)
 - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs)

diff --git a/R/add_coverage.R b/R/add_coverage.R
@@ -47,7 +47,7 @@
 #' @export
 add_coverage <- function(data) {
   stored_attributes <- get_scoringutils_attributes(data)
-  data <- validate(data)
+  data <- as_forecast(data)
   forecast_unit <- get_forecast_unit(data)
   data_cols <- colnames(data) # store so we can reset column order later
 

diff --git a/R/available_forecasts.R b/R/available_forecasts.R
@@ -38,7 +38,7 @@ get_forecast_counts <- function(data,
                                 by = NULL,
                                 collapse = c("quantile", "sample_id")) {
 
-  data <- validate(data)
+  data <- as_forecast(data)
   forecast_unit <- attr(data, "forecast_unit")
   data <- remove_na_observed_predicted(data)
 

diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R
@@ -175,7 +175,7 @@ check_attribute_conflict <- function(object, attribute, expected) {
       "from what's expected based on the data.\n",
       "Existing: ", toString(existing), "\n",
       "Expected: ", toString(expected), "\n",
-      "Running `validate()` again might solve the problem"
+      "Running `as_forecast()` again might solve the problem"
     )
     return(msg)
   }

diff --git a/R/convenience-functions.R b/R/convenience-functions.R
@@ -218,7 +218,7 @@ log_shift <- function(x, offset = 0, base = exp(1)) {
 #' are relevant to determine the forecast unit. This may lead to unexpected
 #' behaviour, so setting the forecast unit explicitly can help make the code
 #' easier to debug and easier to read. When used as part of a workflow,
-#' `set_forecast_unit()` can be directly piped into `validate()` to
+#' `set_forecast_unit()` can be directly piped into `as_forecast()` to
 #' check everything is in order.
 #'
 #' @inheritParams score

diff --git a/R/documentation-templates.R b/R/documentation-templates.R
@@ -1,3 +1,68 @@
+#' @title Documentation template for forecast types
+#'
+#' @details # Forecast types and input format
+#'
+#' Various different forecast types / forecast formats are supported. At the
+#' moment, those are
+#' - point forecasts
+#' - binary forecasts ("soft binary classification")
+#' - Probabilistic forecasts in a quantile-based format (a forecast is
+#' represented as a set of predictive quantiles)
+#' - Probabilistic forecasts in a sample-based format (a forecast is represented
+#' as a set of predictive samples)
+#'
+#' Forecast types are determined based on the columns present in the input data.
+#'
+#' *Point forecasts* require a column `observed` of type numeric and a column
+#' `predicted` of type numeric.
+#'
+#' *Binary forecasts* require a column `observed` of type factor with exactly
+#' two levels and a column `predicted` of type numeric with probabilities,
+#' corresponding to the probability that `observed` is equal to the second
+#' factor level. See details [here][brier_score()] for more information.
+#'
+#' *Quantile-based forecasts* require a column `observed` of type numeric,
+#' a column `predicted` of type numeric, and a column `quantile` of type numeric
+#' with quantile-levels (between 0 and 1).
+#'
+#' *Sample-based forecasts* require a column `observed` of type numeric,
+#' a column `predicted` of type numeric, and a column `sample_id` of type
+#' numeric with sample indices.
+#'
+#' For more information see the vignettes and the example data
+#' ([example_quantile], [example_continuous], [example_integer],
+#' [example_point()], and [example_binary]).
+#'
+#' @details # Forecast unit
+#'
+#' In order to score forecasts, `scoringutils` needs to know which of the rows
+#' of the data belong together and jointly form a single forecasts. This is
+#' easy e.g. for point forecast, where there is one row per forecast. For
+#' quantile or sample-based forecasts, however, there are multiple rows that
+#' belong to single forecast.
+#'
+#' The *forecast unit* or *unit of a single forecast* is then described by the
+#' combination of columns that uniquely identify a single forecast.
+#' For example, we could have forecasts made by different models in various
+#' locations at different time points, each for several weeks into the future.
+#' The forecast unit could then be described as
+#' `forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")`.
+#' `scoringutils` automatically tries to determine the unit of a single
+#' forecast. It uses all existing columns for this, which means that no columns
+#' must be present that are unrelated to the forecast unit. As a very simplistic
+#' example, if you had an additional row, "even", that is one if the row number
+#' is even and zero otherwise, then this would mess up scoring as `scoringutils`
+#' then thinks that this column was relevant in defining the forecast unit.
+#'
+#' In order to avoid issues, we recommend using the function
+#' [set_forecast_unit()] to determine the forecast unit manually.
+#' The function simply drops unneeded columns, while making sure that all
+#' necessary, 'protected columns' like "predicted" or "observed" are retained.
+#'
+#' @name forecast_types
+#' @keywords internal
+NULL
+
 #' Documentation template for check functions
 #' @param data A data.frame or similar to be checked
 #' @param columns A character vector of column names to check

diff --git a/R/get_-functions.R b/R/get_-functions.R
@@ -151,7 +151,7 @@ get_metrics <- function(scores) {
 #' the columns that are protected, i.e. those returned by
 #' [get_protected_columns()] as well as the names of the metrics that were
 #' specified during scoring, if any.
-#' @inheritParams validate
+#' @inheritParams validate_forecast
 #' @param check_conflict Whether or not to check whether there is a conflict
 #' between a stored attribute and the inferred forecast unit. When you create
 #' a forecast object, the forecast unit is stored as an attribute. If you
@@ -183,7 +183,7 @@ get_forecast_unit <- function(data, check_conflict = FALSE) {
 #' @description Helper function to get the names of all columns in a data frame
 #' that are protected columns.
 #'
-#' @inheritParams validate
+#' @inheritParams validate_forecast
 #'
 #' @return A character vector with the names of protected columns in the data.
 #' If data is `NULL` (default) then it returns a list of all columns that are

diff --git a/R/pit.R b/R/pit.R
@@ -185,7 +185,7 @@ pit <- function(data,
                 by,
                 n_replicates = 100) {
 
-  data <- validate(data)
+  data <- as_forecast(data)
   data <- remove_na_observed_predicted(data)
   forecast_type <- get_forecast_type(data)
 

diff --git a/R/score.R b/R/score.R
@@ -1,94 +1,25 @@
 #' @title Evaluate forecasts in a data.frame format
-#'
 #' @description `score()` applies a selection of scoring metrics to a data.frame
 #' of forecasts. It is the workhorse of the `scoringutils` package.
 #' `score()` is a generic that dispatches to different methods depending on the
-#' class of the input data. The default method is `score.default()`, which
-#' validates the input, assigns as class based on the forecast type, and then
-#' calls `score()` again to dispatch to the appropriate method. See below for
-#' more information on how forecast types are determined.
-#'
-#' @details
-#' **Forecast types and input format**
-#'
-#' Various different forecast types / forecast formats are supported. At the
-#' moment, those are
-#' - point forecasts
-#' - binary forecasts ("soft binary classification")
-#' - Probabilistic forecasts in a quantile-based format (a forecast is
-#' represented as a set of predictive quantiles)
-#' - Probabilistic forecasts in a sample-based format (a forecast is represented
-#' as a set of predictive samples)
-#'
-#' Forecast types are determined based on the columns present in the input data.
-#'
-#' *Point forecasts* require a column `observed` of type numeric and a column
-#' `predicted` of type numeric.
-#'
-#' *Binary forecasts* require a column `observed` of type factor with exactly
-#' two levels and a column `predicted` of type numeric with probabilities,
-#' corresponding to the probability that `observed` is equal to the second
-#' factor level. See details [here][brier_score()] for more information.
-#'
-#' *Quantile-based forecasts* require a column `observed` of type numeric,
-#' a column `predicted` of type numeric, and a column `quantile` of type numeric
-#' with quantile-levels (between 0 and 1).
-#'
-#' *Sample-based forecasts* require a column `observed` of type numeric,
-#' a column `predicted` of type numeric, and a column `sample_id` of type
-#' numeric with sample indices.
-#'
-#' For more information see the vignettes and the example data
-#' ([example_quantile], [example_continuous], [example_integer],
-#' [example_point()], and [example_binary]).
-#'
-#' **Forecast unit**
-#'
-#' In order to score forecasts, `scoringutils` needs to know which of the rows
-#' of the data belong together and jointly form a single forecasts. This is
-#' easy e.g. for point forecast, where there is one row per forecast. For
-#' quantile or sample-based forecasts, however, there are multiple rows that
-#' belong to single forecast.
-#'
-#' The *forecast unit* or *unit of a single forecast* is then described by the
-#' combination of columns that uniquely identify a single forecast.
-#' For example, we could have forecasts made by different models in various
-#' locations at different time points, each for several weeks into the future.
-#' The forecast unit could then be described as
-#' `forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")`.
-#' `scoringutils` automatically tries to determine the unit of a single
-#' forecast. It uses all existing columns for this, which means that no columns
-#' must be present that are unrelated to the forecast unit. As a very simplistic
-#' example, if you had an additional row, "even", that is one if the row number
-#' is even and zero otherwise, then this would mess up scoring as `scoringutils`
-#' then thinks that this column was relevant in defining the forecast unit.
-#'
-#' In order to avoid issues, we recommend using the function
-#' [set_forecast_unit()] to determine the forecast unit manually.
-#' The function simply drops unneeded columns, while making sure that all
-#' necessary, 'protected columns' like "predicted" or "observed" are retained.
-#'
-#' **Validating inputs**
-#'
-#' We recommend that users validate their input prior to scoring using the
-#' function [validate()] (though this will also be run internally by [score()]).
-#' The function checks the input data and provides helpful information.
-#'
-#'
-#' **Further help**
+#' class of the input data.
 #'
+#' We recommend that users call [as_forecast()] prior to calling `score()` to
+#' validate the input data and convert it to a forecast object (though
+#' `score.default()` will do this if it hasn't happened before).
+#' See below for more information on forecast types and input formats.
 #' For additional help and examples, check out the [Getting Started
 #' Vignette](https://epiforecasts.io/scoringutils/articles/scoringutils.html) as
 #' well as the paper [Evaluating Forecasts with scoringutils in
 #' R](https://arxiv.org/abs/2205.07090).
-#'
+#' @inheritSection forecast_types Forecast types and input format
+#' @inheritSection forecast_types Forecast unit
 #' @param data A data.frame or data.table with predicted and observed values.
 #' @param metrics A named list of scoring functions. Names will be used as
 #' column names in the output. See [metrics_point()], [metrics_binary()],
 #' `metrics_quantile()`, and [metrics_sample()] for more information on the
 #' default metrics used.
 #' @param ... additional arguments
-#'
 #' @return A data.table with unsummarised scores. This will generally be
 #' one score per forecast (as defined by the unit of a single forecast).
 #'
@@ -97,14 +28,12 @@
 #' for individual quantiles. You can call [summarise_scores()]) on the
 #' unsummarised scores to obtain one score per forecast unit for quantile-based
 #' forecasts.
-#'
 #' @importFrom data.table ':=' as.data.table
-#'
 #' @examples
 #' library(magrittr) # pipe operator
 #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN
 #'
-#' validated <- validate(example_quantile)
+#' validated <- as_forecast(example_quantile)
 #' score(validated) %>%
 #'   summarise_scores(by = c("model", "target_type"))
 #'
@@ -114,7 +43,7 @@
 #'   set_forecast_unit(
 #'     c("location", "target_end_date", "target_type", "horizon", "model")
 #'   ) %>%
-#'   validate() %>%
+#'   as_forecast() %>%
 #'   score()
 #'
 #' # forecast formats with different metrics
@@ -125,13 +54,11 @@
 #' score(example_integer)
 #' score(example_continuous)
 #' }
-#'
 #' @author Nikos Bosse \email{nikosbosse@@gmail.com}
 #' @references
 #' Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S
 #' (2022) Evaluating Forecasts with scoringutils in R.
 #' \doi{10.48550/arXiv.2205.07090}
-#'
 #' @export
 
 score <- function(data, ...) {
@@ -141,14 +68,16 @@ score <- function(data, ...) {
 #' @rdname score
 #' @export
 score.default <- function(data, ...) {
-  data <- validate(data)
+  assert(check_data_columns(data))
+  forecast_type <- get_forecast_type(data)
+  data <- new_scoringutils(data, paste0("scoringutils_", forecast_type))
   score(data, ...)
 }
 
 #' @rdname score
 #' @export
 score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) {
-  data <- validate(data)
+  data <- validate_forecast(data)
   data <- remove_na_observed_predicted(data)
   metrics <- validate_metrics(metrics)
 
@@ -168,7 +97,7 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) {
 #' @rdname score
 #' @export
 score.scoringutils_point <- function(data, metrics = metrics_point, ...) {
-  data <- validate(data)
+  data <- validate_forecast(data)
   data <- remove_na_observed_predicted(data)
   metrics <- validate_metrics(metrics)
 
@@ -185,7 +114,7 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) {
 #' @rdname score
 #' @export
 score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) {
-  data <- validate(data)
+  data <- validate_forecast(data)
   data <- remove_na_observed_predicted(data)
   forecast_unit <- attr(data, "forecast_unit")
   metrics <- validate_metrics(metrics)
@@ -222,7 +151,7 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) {
 #' @rdname score
 #' @export
 score.scoringutils_quantile <- function(data, metrics = metrics_quantile, ...) {
-  data <- validate(data)
+  data <- validate_forecast(data)
   data <- remove_na_observed_predicted(data)
   forecast_unit <- attr(data, "forecast_unit")
   metrics <- validate_metrics(metrics)

diff --git a/R/utils.R b/R/utils.R
@@ -52,7 +52,7 @@ collapse_messages <- function(type = "messages", messages) {
 #' @export
 #' @keywords check-forecasts
 #' @examples
-#' check <- validate(example_quantile)
+#' check <- as_forecast(example_quantile)
 #' print(check)
 print.scoringutils_check <- function(x, ...) {
   cat("Your forecasts seem to be for a target of the following type:\n")