diff --git a/NAMESPACE b/NAMESPACE index 824cee1d8..969ce905f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,10 +8,10 @@ S3method(score,scoringutils_binary) S3method(score,scoringutils_point) S3method(score,scoringutils_quantile) S3method(score,scoringutils_sample) -S3method(validate,scoringutils_binary) -S3method(validate,scoringutils_point) -S3method(validate,scoringutils_quantile) -S3method(validate,scoringutils_sample) +S3method(validate_forecast,scoringutils_binary) +S3method(validate_forecast,scoringutils_point) +S3method(validate_forecast,scoringutils_quantile) +S3method(validate_forecast,scoringutils_sample) export(abs_error) export(add_coverage) export(add_pairwise_comparison) @@ -69,7 +69,7 @@ export(summarize_scores) export(theme_scoringutils) export(transform_forecasts) export(underprediction) -export(validate) +export(validate_forecast) export(validate_general) export(wis) importFrom(Metrics,ae) diff --git a/NEWS.md b/NEWS.md index cede34190..b7fd1d529 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,7 @@ The update introduces breaking changes. If you want to keep using the older vers - `observed`: numeric, either a scalar or a vector - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector) - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`. -- `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. +- `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. - `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`) - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs) diff --git a/R/get_-functions.R b/R/get_-functions.R index a2e0cb648..e889911b4 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -151,7 +151,7 @@ get_metrics <- function(scores) { #' the columns that are protected, i.e. those returned by #' [get_protected_columns()] as well as the names of the metrics that were #' specified during scoring, if any. -#' @inheritParams validate +#' @inheritParams validate_forecast #' @param check_conflict Whether or not to check whether there is a conflict #' between a stored attribute and the inferred forecast unit. When you create #' a forecast object, the forecast unit is stored as an attribute. If you @@ -183,7 +183,7 @@ get_forecast_unit <- function(data, check_conflict = FALSE) { #' @description Helper function to get the names of all columns in a data frame #' that are protected columns. #' -#' @inheritParams validate +#' @inheritParams validate_forecast #' #' @return A character vector with the names of protected columns in the data. #' If data is `NULL` (default) then it returns a list of all columns that are diff --git a/R/score.R b/R/score.R index 26a7023ef..b521838fb 100644 --- a/R/score.R +++ b/R/score.R @@ -75,7 +75,7 @@ score.default <- function(data, ...) { #' @rdname score #' @export score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { - data <- validate(data) + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) @@ -95,7 +95,7 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { #' @rdname score #' @export score.scoringutils_point <- function(data, metrics = metrics_point, ...) { - data <- validate(data) + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) @@ -112,7 +112,7 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) { #' @rdname score #' @export score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { - data <- validate(data) + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) @@ -149,7 +149,7 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { #' @rdname score #' @export score.scoringutils_quantile <- function(data, metrics = metrics_quantile, ...) { - data <- validate(data) + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) diff --git a/R/validate.R b/R/validate.R index c1adadba7..f8b20ab98 100644 --- a/R/validate.R +++ b/R/validate.R @@ -29,7 +29,7 @@ as_forecast <- function(data) { data <- new_scoringutils(data, paste0("scoringutils_", forecast_type)) # validate class - validate(data) + validate_forecast(data) } @@ -48,15 +48,18 @@ as_forecast <- function(data) { #' @importFrom checkmate assert_data_frame #' @export #' @keywords check-forecasts -validate <- function(data, ...) { - UseMethod("validate") +#' @examples +#' forecast <- as_forecast(example_binary) +#' validate_forecast(forecast) +validate_forecast <- function(data, ...) { + UseMethod("validate_forecast") } #' @rdname validate #' @export #' @keywords check-forecasts -validate.scoringutils_binary <- function(data, ...) { +validate_forecast.scoringutils_binary <- function(data, ...) { data <- validate_general(data) columns_correct <- test_columns_not_present(data, c("sample_id", "quantile")) @@ -77,7 +80,7 @@ validate.scoringutils_binary <- function(data, ...) { #' @rdname validate #' @export #' @keywords check-forecasts -validate.scoringutils_point <- function(data, ...) { +validate_forecast.scoringutils_point <- function(data, ...) { data <- validate_general(data) input_check <- check_input_point(data$observed, data$predicted) @@ -91,7 +94,7 @@ validate.scoringutils_point <- function(data, ...) { #' @rdname validate #' @export -validate.scoringutils_quantile <- function(data, ...) { +validate_forecast.scoringutils_quantile <- function(data, ...) { data <- validate_general(data) assert_numeric(data$quantile, lower = 0, upper = 1) return(data[]) @@ -100,7 +103,7 @@ validate.scoringutils_quantile <- function(data, ...) { #' @rdname validate #' @export #' @keywords check-forecasts -validate.scoringutils_sample <- function(data, ...) { +validate_forecast.scoringutils_sample <- function(data, ...) { data <- validate_general(data) return(data[]) } diff --git a/README.Rmd b/README.Rmd index 0c4c41223..15d8ab179 100644 --- a/README.Rmd +++ b/README.Rmd @@ -85,12 +85,12 @@ example_quantile %>% ### Scoring forecasts -Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded columns. To verify everything is in order, the function `validate()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. +Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded columns. To verify everything is in order, the function `validate_forecast()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. ```{r score-example} example_quantile %>% set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - validate() %>% + validate_forecast() %>% add_coverage() %>% score() %>% summarise_scores( diff --git a/README.md b/README.md index 0a5f93881..3d28681ca 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded -columns. To verify everything is in order, the function `validate()` +columns. To verify everything is in order, the function `validate_forecast()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from @@ -128,7 +128,7 @@ details. Finally we summarise these scores by model and target type. ``` r example_quantile %>% set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - validate() %>% + validate_forecast() %>% add_coverage() %>% score() %>% summarise_scores( diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd index 8de3066b5..3c8f8de6e 100644 --- a/man/as_forecast.Rd +++ b/man/as_forecast.Rd @@ -61,4 +61,8 @@ For more information see the vignettes and the example data \code{\link[=example_point]{example_point()}}, and \link{example_binary}). } +\examples{ +as_forecast(example_binary) +as_forecast(example_quantile) +} \keyword{check-forecasts} diff --git a/man/validate.Rd b/man/validate_forecast.Rd similarity index 84% rename from man/validate.Rd rename to man/validate_forecast.Rd index 7c3f4a3a6..5d4bbe2d7 100644 --- a/man/validate.Rd +++ b/man/validate_forecast.Rd @@ -1,22 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/validate.R -\name{validate} -\alias{validate} -\alias{validate.scoringutils_binary} -\alias{validate.scoringutils_point} -\alias{validate.scoringutils_quantile} -\alias{validate.scoringutils_sample} +\name{validate_forecast} +\alias{validate_forecast} \title{Validate input data} \usage{ -validate(data, ...) - -\method{validate}{scoringutils_binary}(data, ...) - -\method{validate}{scoringutils_point}(data, ...) - -\method{validate}{scoringutils_quantile}(data, ...) - -\method{validate}{scoringutils_sample}(data, ...) +validate_forecast(data, ...) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} @@ -68,4 +56,8 @@ For more information see the vignettes and the example data \code{\link[=example_point]{example_point()}}, and \link{example_binary}). } +\examples{ +forecast <- as_forecast(example_binary) +validate_forecast(forecast) +} \keyword{check-forecasts} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 5d8b37c3f..1afc9df1e 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -104,7 +104,7 @@ test_that("run_safely() works as expected", { # }) # test_that("is_scoringutils_check() is working", { -# checked <- suppressMessages(validate(example_binary)) +# checked <- suppressMessages(validate_forecast(example_binary)) # expect_true(is_scoringutils_check(checked)) # # checked$cleaned_data <- NULL