diff --git a/.Rbuildignore b/.Rbuildignore index 6cc6c4991..04a17fdfa 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,6 +12,7 @@ ^Meta$ ^_pkgdown\.yml$ ^inst/manuscript/manuscript_cache$ +^inst/manuscript/.trackdown$ ^\.lintr$ ^docs$ ^\.devcontainer$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index f2a50448b..cda899829 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -23,7 +23,7 @@ jobs: - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: '3.5'} + - {os: ubuntu-latest, r: '3.6'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index acd347bab..5bf8eecd3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ inst/manuscript/manuscript.blg inst/manuscript/manuscript.pdf inst/manuscript/manuscript.tex inst/manuscript/manuscript_files/ +inst/manuscript/.trackdown docs ..bfg-report/ .DS_Store diff --git a/.lintr b/.lintr index 7ddfe22a1..5f2c4aacf 100644 --- a/.lintr +++ b/.lintr @@ -11,6 +11,6 @@ linters: linters_with_tags( exclusions: c( list.files("tests", recursive = TRUE, full.names = TRUE), list.files("inst", recursive = TRUE, full.names = TRUE), - "vignettes/metric-details.Rmd" + list.files("vignettes", pattern = ".R$", full.names = TRUE) ) exclude: "# nolint" diff --git a/DESCRIPTION b/DESCRIPTION index cde730e6e..95e581a07 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,5 +73,5 @@ URL: https://doi.org/10.48550/arXiv.2205.07090, https://epiforecasts.io/scoringu BugReports: https://github.com/epiforecasts/scoringutils/issues VignetteBuilder: knitr Depends: - R (>= 3.5) + R (>= 3.6) Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index d3b6ffe22..35c1eb7c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,29 +1,26 @@ # Generated by roxygen2: do not edit by hand -S3method(plot,scoringutils_available_forecasts) +S3method(as_forecast,default) S3method(print,scoringutils_check) S3method(quantile_to_interval,data.frame) S3method(quantile_to_interval,numeric) S3method(score,default) -S3method(score,scoringutils_binary) -S3method(score,scoringutils_point) -S3method(score,scoringutils_quantile) -S3method(score,scoringutils_sample) -S3method(validate,default) -S3method(validate,scoringutils_binary) -S3method(validate,scoringutils_point) -S3method(validate,scoringutils_quantile) -S3method(validate,scoringutils_sample) +S3method(score,forecast_binary) +S3method(score,forecast_point) +S3method(score,forecast_quantile) +S3method(score,forecast_sample) +S3method(validate_forecast,forecast_binary) +S3method(validate_forecast,forecast_point) +S3method(validate_forecast,forecast_quantile) +S3method(validate_forecast,forecast_sample) export(abs_error) export(add_coverage) export(add_pairwise_comparison) export(ae_median_quantile) export(ae_median_sample) -export(avail_forecasts) -export(available_forecasts) +export(as_forecast) export(available_metrics) export(bias_quantile) -export(bias_range) export(bias_sample) export(brier_score) export(correlation) @@ -31,10 +28,12 @@ export(crps_sample) export(dispersion) export(dss_sample) export(get_duplicate_forecasts) -export(interval_coverage_deviation_quantile) +export(get_forecast_counts) +export(get_forecast_type) +export(get_forecast_unit) +export(interval_coverage_dev_quantile) export(interval_coverage_quantile) export(interval_coverage_sample) -export(interval_score) export(log_shift) export(logs_binary) export(logs_sample) @@ -42,13 +41,13 @@ export(mad_sample) export(make_NA) export(make_na) export(merge_pred_and_obs) -export(new_scoringutils) +export(new_forecast) export(overprediction) export(pairwise_comparison) export(pit) export(pit_sample) -export(plot_avail_forecasts) export(plot_correlation) +export(plot_forecast_counts) export(plot_heatmap) export(plot_interval_coverage) export(plot_pairwise_comparison) @@ -59,6 +58,7 @@ export(plot_ranges) export(plot_score_table) export(plot_wis) export(quantile_score) +export(quantile_to_interval) export(run_safely) export(sample_to_quantile) export(score) @@ -70,7 +70,7 @@ export(summarize_scores) export(theme_scoringutils) export(transform_forecasts) export(underprediction) -export(validate) +export(validate_forecast) export(validate_general) export(wis) importFrom(Metrics,ae) @@ -85,6 +85,8 @@ importFrom(checkmate,assert_list) importFrom(checkmate,assert_logical) importFrom(checkmate,assert_number) importFrom(checkmate,assert_numeric) +importFrom(checkmate,assert_string) +importFrom(checkmate,assert_vector) importFrom(checkmate,check_atomic_vector) importFrom(checkmate,check_data_frame) importFrom(checkmate,check_function) diff --git a/NEWS.md b/NEWS.md index 0faf9eaf2..e77c172c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ The update introduces breaking changes. If you want to keep using the older vers ## Package updates - In `score()`, required columns "true_value" and "prediction" were renamed and replaced by required columns "observed" and "predicted". Scoring functions now also use the function arguments "observed" and "predicted" everywhere consistently. +- The overall scoring workflow was updated. `score()` is now a generic function that dispatches the correct method based on the forecast type. forecast types currently supported are "binary", "point", "sample" and "quantile" with corresponding classes "forecast_binary", "forecast_point", "forecast_sample" and "forecast_quantile". An object of class `forecast_*` can be created using the function `as_forecast()`, which also replaces the previous function `check_forecasts()` (see more information below). - Scoring functions received a consistent interface and input checks: - metrics for binary forecasts: - `observed`: factor with exactly 2 levels @@ -20,15 +21,18 @@ The update introduces breaking changes. If you want to keep using the older vers - `observed`: numeric, either a scalar or a vector - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector) - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`. -- `check_forecasts()` was replaced by a new function `validate()`. `validate()` validates the input and in that sense fulfills the purpose of `check_forecasts()`. It has different methods: `validate.default()` assigns the input a class based on their forecast type. Other methods validate the input specifically for the various forecast types. +- `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. - `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`) +- Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs) + - The function `bias_range()` was removed (users should now use `bias_quantile()` instead) + - The function `interval_score()` was made an internal function rather than being exported to users. We recommend using `wis()` instead. - The function `find_duplicates()` was renamed to `get_duplicate_forecasts()` - Changes to `avail_forecasts()` and `plot_avail_forecasts()`: - - The function `avail_forecasts()` was renamed to `available_forecasts()` for consistency with `available_metrics()`. The old function, `avail_forecasts()` is still available as an alias, but will be removed in the future. - - For clarity, the output column in `avail_forecasts()` was renamed from "Number forecasts" to "count". - - `available_forecasts()` now also displays combinations where there are 0 forecasts, instead of silently dropping corresponding rows. - - `plot_avail_forecasts()` has been deprecated in favour of an S3 method for `plot()`. An alias is still available, but will be removed in the future. + - The function `avail_forecasts()` was renamed to `get_forecast_counts()`. This represents a change in the naming convention where we aim to name functions that provide the user with additional useful information about the data with a prefix "get_". Sees Issue #403 and #521 and PR #511 by @nikosbosse and reviewed by @seabbs for details. + - For clarity, the output column in `get_forecast_counts()` was renamed from "Number forecasts" to "count". + - `get_forecast_counts()` now also displays combinations where there are 0 forecasts, instead of silently dropping corresponding rows. + - `plot_avail_forecasts()` was renamed `plot_forecast_counts()` in line with the change in the function name. The `x` argument no longer has a default value, as the value will depend on the data provided by the user. - The deprecated `..density..` was replaced with `after_stat(density)` in ggplot calls. - Files ending in ".Rda" were renamed to ".rds" where appropriate when used together with `saveRDS()` or `readRDS()`. - added documentation for the return value of `summarise_scores()`. @@ -188,7 +192,7 @@ to a function `summarise_scores()` - New function `check_forecasts()` to analyse input data before scoring - New function `correlation()` to compute correlations between different metrics - New function `add_coverage()` to add coverage for specific central prediction intervals. -- New function `available_forecasts()` allows to visualise the number of available forecasts. +- New function `avail_forecasts()` allows to visualise the number of available forecasts. - New function `find_duplicates()` to find duplicate forecasts which cause an error. - All plotting functions were renamed to begin with `plot_`. Arguments were simplified. diff --git a/R/add_coverage.R b/R/add_coverage.R index 684556026..e7e251614 100644 --- a/R/add_coverage.R +++ b/R/add_coverage.R @@ -47,17 +47,13 @@ #' @export add_coverage <- function(data) { stored_attributes <- get_scoringutils_attributes(data) - data <- validate(data) + data <- as_forecast(data) forecast_unit <- get_forecast_unit(data) data_cols <- colnames(data) # store so we can reset column order later - # what happens if quantiles are not symmetric around the median? - # should things error? Also write tests for that. interval_data <- quantile_to_interval(data, format = "wide") - interval_data[, interval_coverage := ifelse( - observed <= upper & observed >= lower, - TRUE, - FALSE) + interval_data[, + interval_coverage := (observed <= upper) & (observed >= lower) ][, c("lower", "upper", "observed") := NULL] data[, range := get_range_from_quantile(quantile)] diff --git a/R/available_forecasts.R b/R/available_forecasts.R index 17418b0a6..40eda6080 100644 --- a/R/available_forecasts.R +++ b/R/available_forecasts.R @@ -31,14 +31,14 @@ #' @examples #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN #' -#' available_forecasts(example_quantile, +#' get_forecast_counts(example_quantile, #' by = c("model", "target_type") #' ) -available_forecasts <- function(data, +get_forecast_counts <- function(data, by = NULL, collapse = c("quantile", "sample_id")) { - data <- validate(data) + data <- as_forecast(data) forecast_unit <- attr(data, "forecast_unit") data <- remove_na_observed_predicted(data) @@ -58,7 +58,7 @@ available_forecasts <- function(data, data <- data[data[, .I[1], by = collapse_by]$V1] # count number of rows = number of forecasts - out <- data[, .(`count` = .N), by = by] + out <- data[, .(count = .N), by = by] # make sure that all combinations in "by" are included in the output (with # count = 0). To achieve that, take the unique values in data and expand grid @@ -70,23 +70,5 @@ available_forecasts <- function(data, out <- merge(out, out_empty, by = by, all.y = TRUE) out[, count := nafill(count, fill = 0)] - class(out) <- c("scoringutils_available_forecasts", class(out)) - return(out[]) } - -#' @title Count Number of Available Forecasts `r lifecycle::badge("deprecated")` -#' @details `r lifecycle::badge("deprecated")` Deprecated in 1.2.2. Use -#' [available_forecasts()] instead. -#' @inherit available_forecasts -#' @keywords check-forecasts -#' @export -avail_forecasts <- function(data, - by = NULL, - collapse = c("quantile", "sample")) { - lifecycle::deprecate_warn( - "1.2.2", "avail_forecasts()", - "available_forecasts()" - ) - available_forecasts(data, by, collapse) -} diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 95869f02b..63bace514 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -5,7 +5,7 @@ #' @inheritDotParams checkmate::check_numeric #' @importFrom checkmate check_atomic_vector check_numeric #' @inherit document_check_functions return -#' @keywords internal +#' @keywords internal_input_check check_numeric_vector <- function(x, ...) { # check functions must return TRUE on success # and a custom error message otherwise @@ -36,7 +36,7 @@ check_numeric_vector <- function(x, ...) { #' #' @return None. Function errors if quantiles are invalid. #' -#' @keywords internal +#' @keywords internal_input_check check_quantiles <- function(quantiles, name = "quantiles", range = c(0, 1)) { if (any(quantiles < range[1]) || any(quantiles > range[2])) { stop(name, " must be between ", range[1], " and ", range[2]) @@ -57,7 +57,7 @@ check_quantiles <- function(quantiles, name = "quantiles", range = c(0, 1)) { #' @param expr an expression to be evaluated #' @importFrom checkmate assert assert_numeric check_matrix #' @inherit document_check_functions return -#' @keywords internal +#' @keywords internal_input_check check_try <- function(expr) { result <- try(expr, silent = TRUE) if (is.null(result)) { @@ -79,7 +79,7 @@ check_try <- function(expr) { #' @return The function returns `NULL`, but throws an error if the variable is #' missing. #' -#' @keywords internal +#' @keywords internal_input_check assert_not_null <- function(...) { vars <- list(...) varnames <- names(vars) @@ -112,7 +112,7 @@ assert_not_null <- function(...) { #' within another checking function. #' @inherit document_assert_functions return #' -#' @keywords internal +#' @keywords internal_input_check assert_equal_length <- function(..., one_allowed = TRUE, call_levels_up = 2) { @@ -161,7 +161,7 @@ assert_equal_length <- function(..., #' @param attribute The name of the attribute to check #' @param expected The expected value of the attribute #' @inherit document_check_functions return -#' @keywords internal +#' @keywords internal_input_check check_attribute_conflict <- function(object, attribute, expected) { existing <- attr(object, attribute) if (is.vector(existing) && is.vector(expected)) { @@ -175,7 +175,7 @@ check_attribute_conflict <- function(object, attribute, expected) { "from what's expected based on the data.\n", "Existing: ", toString(existing), "\n", "Expected: ", toString(expected), "\n", - "Running `validate()` again might solve the problem" + "Running `as_forecast()` again might solve the problem" ) return(msg) } @@ -188,8 +188,9 @@ check_attribute_conflict <- function(object, attribute, expected) { #' @description #' Check whether the data.table has a column called `model`. #' If not, a column called `model` is added with the value `Unspecified model`. +#' @inheritParams score #' @return The data.table with a column called `model` -#' @keywords internal +#' @keywords internal_input_check assure_model_column <- function(data) { if (!("model" %in% colnames(data))) { message( @@ -208,7 +209,7 @@ assure_model_column <- function(data) { #' returns TRUE and a string with an error message otherwise. #' @param forecast_unit Character vector denoting the unit of a single forecast. #' @inherit document_check_functions params return -#' @keywords internal +#' @keywords internal_input_check check_number_per_forecast <- function(data, forecast_unit) { # check whether there are the same number of quantiles, samples -------------- data[, scoringutils_InternalNumCheck := length(predicted), by = forecast_unit] @@ -235,7 +236,7 @@ check_number_per_forecast <- function(data, forecast_unit) { #' an error message, otherwise it returns TRUE. #' @inherit document_check_functions params return #' -#' @keywords internal +#' @keywords internal_input_check check_no_NA_present <- function(data, columns) { for (x in columns){ if (anyNA(data[[x]])) { @@ -253,20 +254,13 @@ check_no_NA_present <- function(data, columns) { } - - -# print stuff -diagnose <- function(data) { - -} - #' Check that there are no duplicate forecasts #' #' @description #' Runs [get_duplicate_forecasts()] and returns a message if an issue is encountered #' @inheritParams get_duplicate_forecasts #' @inherit document_check_functions return -#' @keywords internal +#' @keywords internal_input_check check_duplicates <- function(data, forecast_unit = NULL) { check_duplicates <- get_duplicate_forecasts(data, forecast_unit = forecast_unit) @@ -290,7 +284,7 @@ check_duplicates <- function(data, forecast_unit = NULL) { #' and returns a message with the first issue encountered. #' @inherit document_check_functions params return #' @importFrom checkmate assert_character -#' @keywords check-inputs +#' @keywords internal_input_check check_columns_present <- function(data, columns) { if (is.null(columns)) { return(TRUE) @@ -322,7 +316,7 @@ check_columns_present <- function(data, columns) { #' are present, the function returns TRUE. #' @inheritParams document_check_functions #' @return Returns TRUE if all columns are present and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_columns_present <- function(data, columns) { check <- check_columns_present(data, columns) return(is.logical(check)) @@ -334,7 +328,7 @@ test_columns_present <- function(data, columns) { #' more columns are present, the function returns FALSE. #' @inheritParams document_check_functions #' @return Returns TRUE if none of the columns are present and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_columns_not_present <- function(data, columns) { if (any(columns %in% colnames(data))) { return(FALSE) @@ -349,7 +343,7 @@ test_columns_not_present <- function(data, columns) { #' "quantile" and "sample_id" is present. #' @inherit document_check_functions params return #' @importFrom checkmate check_data_frame -#' @keywords check-inputs +#' @keywords internal_input_check check_data_columns <- function(data) { is_data <- check_data_frame(data, min.rows = 1) if (!is.logical(is_data)) { @@ -374,7 +368,7 @@ check_data_columns <- function(data) { #' @param object An object to be checked #' @param attribute name of an attribute to be checked #' @inherit document_check_functions return -#' @keywords check-inputs +#' @keywords internal_input_check check_has_attribute <- function(object, attribute) { if (is.null(attr(object, attribute))) { return( diff --git a/R/check-inputs-scoring-functions.R b/R/check-inputs-scoring-functions.R index 0e4467e3c..e0c784ff2 100644 --- a/R/check-inputs-scoring-functions.R +++ b/R/check-inputs-scoring-functions.R @@ -10,7 +10,7 @@ #' vector of size N. #' @importFrom checkmate assert assert_numeric check_matrix #' @inherit document_assert_functions return -#' @keywords check-inputs +#' @keywords internal_input_check assert_input_sample <- function(observed, predicted) { assert_numeric(observed, min.len = 1) n_obs <- length(observed) @@ -30,7 +30,7 @@ assert_input_sample <- function(observed, predicted) { #' @title Check that inputs are correct for sample-based forecast #' @inherit assert_input_sample params description #' @inherit document_check_functions return -#' @keywords check-inputs +#' @keywords internal_input_check check_input_sample <- function(observed, predicted) { result <- check_try(assert_input_sample(observed, predicted)) return(result) @@ -54,7 +54,7 @@ check_input_sample <- function(observed, predicted) { #' FALSE. Whether the quantile levels are required to be unique or not. #' @importFrom checkmate assert assert_numeric check_matrix check_vector #' @inherit document_assert_functions return -#' @keywords internal +#' @keywords internal_input_check assert_input_quantile <- function(observed, predicted, quantile, unique_quantiles = TRUE) { assert_numeric(observed, min.len = 1) @@ -85,7 +85,7 @@ assert_input_quantile <- function(observed, predicted, quantile, #' @title Check that inputs are correct for quantile-based forecast #' @inherit assert_input_quantile params description #' @inherit check_input_sample return description -#' @keywords check-inputs +#' @keywords internal_input_check check_input_quantile <- function(observed, predicted, quantile) { result <- check_try(assert_input_quantile(observed, predicted, quantile)) return(result) @@ -106,7 +106,7 @@ check_input_quantile <- function(observed, predicted, quantile) { #' (25%, 75%) prediction interval. #' @importFrom rlang warn #' @inherit document_assert_functions return -#' @keywords internal +#' @keywords internal_input_check assert_input_interval <- function(observed, lower, upper, range) { assert(check_numeric_vector(observed, min.len = 1)) @@ -145,7 +145,7 @@ assert_input_interval <- function(observed, lower, upper, range) { #' @title Check that inputs are correct for interval-based forecast #' @inherit assert_input_interval params description #' @inherit check_input_sample return description -#' @keywords check-inputs +#' @keywords internal_input_check check_input_interval <- function(observed, lower, upper, range) { result <- check_try(assert_input_quantile(observed, lower, upper, range)) return(result) @@ -161,29 +161,25 @@ check_input_interval <- function(observed, lower, upper, range) { #' that `predicted` represents the probability that the observed value is equal #' to the highest factor level. #' @param predicted Input to be checked. `predicted` should be a vector of -#' length n, holding probabilities. Values represent the probability that +#' length n, holding probabilities. Alternatively, `predicted` can be a matrix +#' of size n x 1. Values represent the probability that #' the corresponding value in `observed` will be equal to the highest #' available factor level. #' @importFrom checkmate assert assert_factor #' @inherit document_assert_functions return -#' @keywords check-inputs +#' @keywords internal_input_check assert_input_binary <- function(observed, predicted) { - if (length(observed) != length(predicted)) { - stop("`observed` and `predicted` need to be ", - "of same length when scoring binary forecasts") - } - assert_factor(observed, n.levels = 2) - levels <- levels(observed) - assert( - check_numeric_vector(predicted, min.len = 1, lower = 0, upper = 1) - ) + assert_factor(observed, n.levels = 2, min.len = 1) + assert_numeric(predicted, lower = 0, upper = 1) + assert_dims_ok_point(observed, predicted) return(invisible(NULL)) } + #' @title Check that inputs are correct for binary forecast #' @inherit assert_input_binary params description #' @inherit document_check_functions return -#' @keywords check-inputs +#' @keywords internal_input_check check_input_binary <- function(observed, predicted) { result <- check_try(assert_input_binary(observed, predicted)) return(result) @@ -198,22 +194,64 @@ check_input_binary <- function(observed, predicted) { #' @param predicted Input to be checked. Should be a numeric vector with the #' predicted values of size n #' @inherit document_assert_functions return -#' @keywords check-inputs +#' @keywords internal_input_check assert_input_point <- function(observed, predicted) { - assert(check_numeric_vector(observed, min.len = 1)) - assert(check_numeric_vector(predicted, min.len = 1)) - if (length(observed) != length(predicted)) { - stop("`observed` and `predicted` need to be ", - "of same length when scoring point forecasts") - } + assert(check_numeric(observed)) + assert(check_numeric(predicted)) + assert(check_dims_ok_point(observed, predicted)) return(invisible(NULL)) } #' @title Check that inputs are correct for point forecast #' @inherit assert_input_point params description #' @inherit document_check_functions return -#' @keywords check-inputs +#' @keywords internal_input_check check_input_point <- function(observed, predicted) { result <- check_try(assert_input_point(observed, predicted)) return(result) } + + +#' @title Assert Inputs Have Matching Dimensions +#' @description Function assesses whether input dimensions match. In the +#' following, n is the number of observations / forecasts. Scalar values may +#' be repeated to match the length of the other input. +#' Allowed options are therefore +#' - `observed` is vector of length 1 or length n +#' - `predicted` is +#' - a vector of of length 1 or length n +#' - a matrix with n rows and 1 column +#' @inherit assert_input_binary +#' @inherit document_assert_functions return +#' @importFrom checkmate assert_vector check_matrix check_vector assert +#' @keywords internal_input_check +assert_dims_ok_point <- function(observed, predicted) { + assert_vector(observed, min.len = 1) + n_obs <- length(observed) + assert( + check_vector(predicted, min.len = 1, strict = TRUE), + check_matrix(predicted, ncols = 1, nrows = n_obs) + ) + dim_p <- dim(predicted) + if (!is.null(dim_p) && (length(dim_p) > 1) && (dim_p[2] > 1)) { + stop("`predicted` must be a vector or a matrix with one column. Found ", + dim(predicted)[2], " columns") + } + n_pred <- length(as.vector(predicted)) + # check that both are either of length 1 or of equal length + if ((n_obs != 1) && (n_pred != 1) && (n_obs != n_pred)) { + stop("`observed` and `predicted` must either be of length 1 or ", + "of equal length. Found ", n_obs, " and ", n_pred) + } + return(invisible(NULL)) +} + + +#' @title Check Inputs Have Matching Dimensions +#' @inherit assert_dims_ok_point params description +#' @inherit document_check_functions return +#' @keywords internal_input_check +check_dims_ok_point <- function(observed, predicted) { + result <- check_try(assert_dims_ok_point(observed, predicted)) + return(result) +} diff --git a/R/convenience-functions.R b/R/convenience-functions.R index 31f3ab5ab..32822b96d 100644 --- a/R/convenience-functions.R +++ b/R/convenience-functions.R @@ -218,7 +218,7 @@ log_shift <- function(x, offset = 0, base = exp(1)) { #' are relevant to determine the forecast unit. This may lead to unexpected #' behaviour, so setting the forecast unit explicitly can help make the code #' easier to debug and easier to read. When used as part of a workflow, -#' `set_forecast_unit()` can be directly piped into `validate()` to +#' `set_forecast_unit()` can be directly piped into `as_forecast()` to #' check everything is in order. #' #' @inheritParams score diff --git a/R/correlations.R b/R/correlations.R index 75eda7583..0a553e8a7 100644 --- a/R/correlations.R +++ b/R/correlations.R @@ -21,8 +21,6 @@ correlation <- function(scores, metrics = NULL, digits = NULL) { - metrics <- check_metrics(metrics) - metrics <- get_metrics(scores) # if quantile column is present, throw a warning diff --git a/R/data.R b/R/data.R index c57e044d8..d64b55366 100644 --- a/R/data.R +++ b/R/data.R @@ -19,7 +19,9 @@ #' \item{model}{name of the model that generated the forecasts} #' \item{horizon}{forecast horizon in weeks} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_quantile" @@ -44,7 +46,9 @@ #' \item{model}{name of the model that generated the forecasts} #' \item{horizon}{forecast horizon in weeks} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_point" @@ -69,7 +73,9 @@ #' \item{predicted}{predicted value} #' \item{sample_id}{id for the corresponding sample} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_continuous" @@ -94,6 +100,9 @@ #' \item{predicted}{predicted value} #' \item{sample_id}{id for the corresponding sample} #' } +# nolint start +#' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_integer" @@ -124,7 +133,9 @@ #' \item{horizon}{forecast horizon in weeks} #' \item{predicted}{predicted value} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_binary" @@ -147,7 +158,9 @@ #' \item{model}{name of the model that generated the forecasts} #' \item{horizon}{forecast horizon in weeks} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_quantile_forecasts_only" @@ -167,7 +180,9 @@ #' \item{observed}{observed values} #' \item{location_name}{name of the country for which a prediction was made} #' } +# nolint start #' @source \url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +# nolint end "example_truth_only" #' Summary information for selected metrics @@ -215,14 +230,16 @@ #' Default metrics for quantile-based forecasts. #' #' A named list with functions: -#' - "wis" = [wis()] +#' - "wis" = [wis] #' - "overprediction" = [overprediction()] #' - "underprediction" = [underprediction()] #' - "dispersion" = [dispersion()] #' - "bias" = [bias_quantile()] -#' - "coverage_50" = \(...) {run_safely(..., range = 50, fun = [interval_coverage_quantile][interval_coverage_quantile()])} -#' - "coverage_90" = \(...) {run_safely(..., range = 90, fun = [interval_coverage_quantile][interval_coverage_quantile()])} -#' - "coverage_deviation" = [interval_coverage_deviation_quantile()], +#' - "coverage_50" = [interval_coverage_quantile()] +#' - "coverage_90" = \(...) \{ +#' run_safely(..., range = 90, fun = [interval_coverage_quantile]) +#' \} +#' - "coverage_deviation" = [interval_coverage_dev_quantile()], #' - "ae_median" = [ae_median_quantile()] #' @keywords info "metrics_quantile" diff --git a/R/documentation-templates.R b/R/documentation-templates.R index 087c77d4b..e89648f6a 100644 --- a/R/documentation-templates.R +++ b/R/documentation-templates.R @@ -1,20 +1,88 @@ +#' @title Documentation template for forecast types +#' +#' @details # Forecast types and input format +#' +#' Various different forecast types / forecast formats are supported. At the +#' moment, those are +#' - point forecasts +#' - binary forecasts ("soft binary classification") +#' - Probabilistic forecasts in a quantile-based format (a forecast is +#' represented as a set of predictive quantiles) +#' - Probabilistic forecasts in a sample-based format (a forecast is represented +#' as a set of predictive samples) +#' +#' Forecast types are determined based on the columns present in the input data. +#' +#' *Point forecasts* require a column `observed` of type numeric and a column +#' `predicted` of type numeric. +#' +#' *Binary forecasts* require a column `observed` of type factor with exactly +#' two levels and a column `predicted` of type numeric with probabilities, +#' corresponding to the probability that `observed` is equal to the second +#' factor level. See details [here][brier_score()] for more information. +#' +#' *Quantile-based forecasts* require a column `observed` of type numeric, +#' a column `predicted` of type numeric, and a column `quantile` of type numeric +#' with quantile-levels (between 0 and 1). +#' +#' *Sample-based forecasts* require a column `observed` of type numeric, +#' a column `predicted` of type numeric, and a column `sample_id` of type +#' numeric with sample indices. +#' +#' For more information see the vignettes and the example data +#' ([example_quantile], [example_continuous], [example_integer], +#' [example_point()], and [example_binary]). +#' +#' @details # Forecast unit +#' +#' In order to score forecasts, `scoringutils` needs to know which of the rows +#' of the data belong together and jointly form a single forecasts. This is +#' easy e.g. for point forecast, where there is one row per forecast. For +#' quantile or sample-based forecasts, however, there are multiple rows that +#' belong to single forecast. +#' +#' The *forecast unit* or *unit of a single forecast* is then described by the +#' combination of columns that uniquely identify a single forecast. +#' For example, we could have forecasts made by different models in various +#' locations at different time points, each for several weeks into the future. +#' The forecast unit could then be described as +#' `forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")`. +#' `scoringutils` automatically tries to determine the unit of a single +#' forecast. It uses all existing columns for this, which means that no columns +#' must be present that are unrelated to the forecast unit. As a very simplistic +#' example, if you had an additional row, "even", that is one if the row number +#' is even and zero otherwise, then this would mess up scoring as `scoringutils` +#' then thinks that this column was relevant in defining the forecast unit. +#' +#' In order to avoid issues, we recommend using the function +#' [set_forecast_unit()] to determine the forecast unit manually. +#' The function simply drops unneeded columns, while making sure that all +#' necessary, 'protected columns' like "predicted" or "observed" are retained. +#' +#' @name forecast_types +#' @keywords internal +NULL + #' Documentation template for check functions #' @param data A data.frame or similar to be checked #' @param columns A character vector of column names to check #' @return Returns TRUE if the check was successful and a string with an #' error message otherwise #' @name document_check_functions +#' @keywords internal NULL #' Documentation template for check functions #' @returns Returns NULL invisibly if the assertion was successful and throws an #' error otherwise. #' @name document_assert_functions +#' @keywords internal NULL #' Documentation template for test functions #' @returns Returns TRUE if the check was successful and FALSE otherwise #' @name document_test_functions +#' @keywords internal NULL #' Documentation template for scoring input data @@ -22,4 +90,5 @@ NULL #' specifications detailed in [score()]. #' @param scores A data.table of scores as produced by [score()]. #' @name document_score_data +#' @keywords internal NULL diff --git a/R/get_-functions.R b/R/get_-functions.R index b55e1ef4f..f68434651 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -1,19 +1,27 @@ # Functions that help to obtain information about the data -#' @title Infer the type of a forecast based on a data.frame +#' @title Infer Forecast Type +#' @description Helper function to infer the forecast type based on a +#' data.frame or similar with predictions. Please check the vignettes to +#' learn more about forecast types. #' -#' @description Internal helper function to get the type of the forecast. -#' Options are "sample-based", "quantile-based", "binary" or "point" forecast. -#' The function runs additional checks to make sure the data satisfies -#' requirements and throws an informative error if any issues are found. -#' -#' @inheritParams validate +#' Possible forecast types are +#' - "sample-based" +#' - "quantile-based" +#' - "binary" +#' - "point" forecast. #' +#' The function runs additional checks to make sure the data satisfies the +#' requirements of the respective forecast type and throws an +#' informative error if any issues are found. +#' @inheritParams score #' @return Character vector of length one with either "binary", "quantile", #' "sample" or "point". -#' -#' @keywords internal +#' @export +#' @keywords check-forecasts get_forecast_type <- function(data) { + assert_data_frame(data) + assert(check_columns_present(data, c("observed", "predicted"))) if (test_forecast_type_is_binary(data)) { forecast_type <- "binary" } else if (test_forecast_type_is_quantile(data)) { @@ -24,8 +32,8 @@ get_forecast_type <- function(data) { forecast_type <- "point" } else { stop( - "Checking `data`: input doesn't satisfy criteria for any forecast type.", - "Are you missing a column `quantile` or `sample_id`?", + "Checking `data`: input doesn't satisfy criteria for any forecast type. ", + "Are you missing a column `quantile` or `sample_id`? ", "Please check the vignette for additional info." ) } @@ -42,7 +50,7 @@ get_forecast_type <- function(data) { #' @inheritParams document_check_functions #' @importFrom checkmate test_factor test_numeric #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_forecast_type_is_binary <- function(data) { observed_correct <- test_factor(x = data$observed) predicted_correct <- test_numeric(x = data$predicted) @@ -53,7 +61,7 @@ test_forecast_type_is_binary <- function(data) { #' @description Checks type of the necessary columns. #' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_forecast_type_is_sample <- function(data) { observed_correct <- test_numeric(x = data$observed) predicted_correct <- test_numeric(x = data$predicted) @@ -65,7 +73,7 @@ test_forecast_type_is_sample <- function(data) { #' @description Checks type of the necessary columns. #' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_forecast_type_is_point <- function(data) { observed_correct <- test_numeric(x = data$observed) predicted_correct <- test_numeric(x = data$predicted) @@ -77,7 +85,7 @@ test_forecast_type_is_point <- function(data) { #' @description Checks type of the necessary columns. #' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise -#' @keywords internal +#' @keywords internal_input_check test_forecast_type_is_quantile <- function(data) { observed_correct <- test_numeric(x = data$observed) predicted_correct <- test_numeric(x = data$predicted) @@ -92,13 +100,10 @@ test_forecast_type_is_quantile <- function(data) { #' of observed or predicted values). The function checks whether the input is #' a factor, or else whether it is integer (or can be coerced to integer) or #' whether it's continuous. -#' #' @param x Input used to get the type. -#' #' @return Character vector of length one with either "classification", #' "integer", or "continuous" -#' -#' @keywords internal +#' @keywords internal_input_check get_type <- function(x) { if (is.factor(x)) { return("classification") @@ -121,15 +126,11 @@ get_type <- function(x) { #' @title Get metrics that were used for scoring -#' #' @description Internal helper function to get the metrics that were used #' to score forecasts. -#' @param score A data.table with an attribute `metric_names` -#' +#' @param scores A data.table with an attribute `metric_names` #' @return Character vector with the metrics that were used for scoring. -#' -#' @keywords internal - +#' @keywords internal_input_check get_metrics <- function(scores) { metric_names <- attr(scores, "metric_names") if (is.null(metric_names)) { @@ -144,22 +145,23 @@ get_metrics <- function(scores) { #' @title Get unit of a single forecast -#' #' @description Helper function to get the unit of a single forecast, i.e. #' the column names that define where a single forecast was made for. #' This just takes all columns that are available in the data and subtracts #' the columns that are protected, i.e. those returned by #' [get_protected_columns()] as well as the names of the metrics that were #' specified during scoring, if any. -#' -#' @inheritParams validate +#' @inheritParams validate_forecast #' @param check_conflict Whether or not to check whether there is a conflict -#' between a stored attribute and the inferred forecast unit. Defaults to FALSE. -#' +#' between a stored attribute and the inferred forecast unit. When you create +#' a forecast object, the forecast unit is stored as an attribute. If you +#' later change the columns of the data, the forecast unit as inferred from the +#' data might change compared to the stored attribute. Should this result in a +#' warning? Defaults to FALSE. #' @return A character vector with the column names that define the unit of #' a single forecast -#' -#' @keywords internal +#' @export +#' @keywords check-forecasts get_forecast_unit <- function(data, check_conflict = FALSE) { # check whether there is a conflict in the forecast_unit and if so warn protected_columns <- get_protected_columns(data) @@ -181,7 +183,7 @@ get_forecast_unit <- function(data, check_conflict = FALSE) { #' @description Helper function to get the names of all columns in a data frame #' that are protected columns. #' -#' @inheritParams validate +#' @inheritParams validate_forecast #' #' @return A character vector with the names of protected columns in the data. #' If data is `NULL` (default) then it returns a list of all columns that are @@ -248,7 +250,7 @@ get_duplicate_forecasts <- function(data, forecast_unit = NULL) { #' @title Get a list of all attributes of a scoringutils object #' -#' @param object A object of class `scoringutils_` +#' @param object A object of class `forecast_` #' #' @return A named list with the attributes of that object. #' @keywords internal diff --git a/R/metrics-binary.R b/R/metrics-binary.R index 4a0abed49..104a8bcbe 100644 --- a/R/metrics-binary.R +++ b/R/metrics-binary.R @@ -83,6 +83,6 @@ brier_score <- function(observed, predicted) { logs_binary <- function(observed, predicted) { assert_input_binary(observed, predicted) observed <- as.numeric(observed) - 1 - logs <- -log(ifelse(observed == 1, predicted, 1 - predicted)) + logs <- -log(1 - abs(observed - predicted)) return(logs) } diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R index 50f729f15..14def7b18 100644 --- a/R/metrics-quantile.R +++ b/R/metrics-quantile.R @@ -96,6 +96,16 @@ #' `wis()`: a numeric vector with WIS values of size n (one per observation), #' or a list with separate entries if `separate_results` is `TRUE`. #' @export +#' @keywords metric +#' @examples +#' observed <- c(1, -15, 22) +#' predicted <- rbind( +#' c(-1, 0, 1, 2, 3), +#' c(-2, 1, 2, 2, 4), +#' c(-2, 0, 3, 3, 4) +#' ) +#' quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) +#' wis(observed, predicted, quantile) wis <- function(observed, predicted, quantile, @@ -119,25 +129,26 @@ wis <- function(observed, reformatted[, eval(cols) := do.call( interval_score, - list(observed = observed, - lower = lower, - upper = upper, - interval_range = range, - weigh = weigh, - separate_results = separate_results + list( + observed = observed, + lower = lower, + upper = upper, + interval_range = range, + weigh = weigh, + separate_results = separate_results ) )] - if (!count_median_twice) { - reformatted[, weight := ifelse(range == 0, 0.5, 1)] - } else { + if (count_median_twice) { reformatted[, weight := 1] + } else { + reformatted[, weight := ifelse(range == 0, 0.5, 1)] } # summarise results by forecast_id reformatted <- reformatted[ , lapply(.SD, weighted.mean, na.rm = na.rm, w = weight), - by = c("forecast_id"), + by = "forecast_id", .SDcols = colnames(reformatted) %like% paste(cols, collapse = "|") ] @@ -160,6 +171,7 @@ wis <- function(observed, #' `overprediction()`, `underprediction()` and `dispersion()` #' @export #' @rdname wis +#' @keywords metric dispersion <- function(observed, predicted, quantile, ...) { args <- list(...) args$separate_results <- TRUE @@ -173,6 +185,7 @@ dispersion <- function(observed, predicted, quantile, ...) { #' observation) #' @export #' @rdname wis +#' @keywords metric overprediction <- function(observed, predicted, quantile, ...) { args <- list(...) args$separate_results <- TRUE @@ -186,6 +199,7 @@ overprediction <- function(observed, predicted, quantile, ...) { #' observation) #' @export #' @rdname wis +#' @keywords metric underprediction <- function(observed, predicted, quantile, ...) { args <- list(...) args$separate_results <- TRUE @@ -209,6 +223,7 @@ underprediction <- function(observed, predicted, quantile, ...) { #' corresponding prediction interval and FALSE otherwise. #' @name interval_coverage #' @export +#' @keywords metric #' @examples #' observed <- c(1, -15, 22) #' predicted <- rbind( @@ -225,15 +240,14 @@ interval_coverage_quantile <- function(observed, predicted, quantile, range = 50 if (!all(necessary_quantiles %in% quantile)) { warning( "To compute the coverage for a range of ", range, "%, the quantiles ", - necessary_quantiles, " are required. Returning `NA`.") + necessary_quantiles, " are required. Returning `NA`." + ) return(NA) } r <- range reformatted <- quantile_to_interval(observed, predicted, quantile) reformatted <- reformatted[range %in% r] - reformatted[, coverage := ifelse( - observed >= lower & observed <= upper, TRUE, FALSE - )] + reformatted[, coverage := (observed >= lower) & (observed <= upper)] return(reformatted$coverage) } @@ -282,6 +296,7 @@ interval_coverage_quantile <- function(observed, predicted, quantile, range = 50 #' @return A numeric vector of length n with the coverage deviation for each #' forecast (comprising one or multiple prediction intervals). #' @export +#' @keywords metric #' @examples #' observed <- c(1, -15, 22) #' predicted <- rbind( @@ -290,34 +305,32 @@ interval_coverage_quantile <- function(observed, predicted, quantile, range = 50 #' c(-2, 0, 3, 3, 4) #' ) #' quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) -#' interval_coverage_deviation_quantile(observed, predicted, quantile) -interval_coverage_deviation_quantile <- function(observed, predicted, quantile) { +#' interval_coverage_dev_quantile(observed, predicted, quantile) +interval_coverage_dev_quantile <- function(observed, predicted, quantile) { assert_input_quantile(observed, predicted, quantile) # transform available quantiles into central interval ranges available_ranges <- unique(get_range_from_quantile(quantile)) # check if all necessary quantiles are available - necessary_quantiles <- unique(c( - (100 - available_ranges) / 2, - 100 - (100 - available_ranges) / 2) / 100 + necessary_quantiles <- unique( + c((100 - available_ranges) / 2, 100 - (100 - available_ranges) / 2) / 100 ) if (!all(necessary_quantiles %in% quantile)) { missing <- necessary_quantiles[!necessary_quantiles %in% quantile] warning( "To compute coverage deviation, all quantiles must form central ", "symmetric prediction intervals. Missing quantiles: ", - toString(missing), ". Returning `NA`.") + toString(missing), ". Returning `NA`." + ) return(NA) } reformatted <- quantile_to_interval(observed, predicted, quantile)[range != 0] - reformatted[, coverage := ifelse( - observed >= lower & observed <= upper, TRUE, FALSE - )] + reformatted[, coverage := (observed >= lower) & (observed <= upper)] reformatted[, coverage_deviation := coverage - range / 100] out <- reformatted[, .(coverage_deviation = mean(coverage_deviation)), - by = c("forecast_id")] + by = "forecast_id"] return(out$coverage_deviation) } @@ -358,17 +371,17 @@ interval_coverage_deviation_quantile <- function(observed, predicted, quantile) #' #' Bias can assume values between #' -1 and 1 and is 0 ideally (i.e. unbiased). +#' @param observed a single number representing the observed value #' @param predicted vector of length corresponding to the number of quantiles #' that holds predictions #' @param quantile vector of corresponding size with the quantile levels for #' which predictions were made. If this does not contain the median (0.5) then #' the median is imputed as being the mean of the two innermost quantiles. -#' @inheritParams bias_range #' @param na.rm logical. Should missing values be removed? #' @return scalar with the quantile bias for a single quantile prediction -#' @author Nikos Bosse \email{nikosbosse@@gmail.com} +#' @export +#' @keywords metric #' @examples -#' #' predicted <- c( #' 705.500, 1127.000, 4006.250, 4341.500, 4709.000, 4821.996, #' 5340.500, 5451.000, 5703.500, 6087.014, 6329.500, 6341.000, @@ -381,8 +394,6 @@ interval_coverage_deviation_quantile <- function(observed, predicted, quantile) #' observed <- 8062 #' #' bias_quantile(observed, predicted, quantile) -#' @export -#' @keywords metric bias_quantile <- function(observed, predicted, quantile, na.rm = TRUE) { assert_input_quantile(observed, predicted, quantile) n <- length(observed) @@ -390,8 +401,14 @@ bias_quantile <- function(observed, predicted, quantile, na.rm = TRUE) { if (is.null(dim(predicted))) { dim(predicted) <- c(n, N) } + if (!(0.5 %in% quantile)) { + message( + "Median not available, computing bias as mean of the two innermost ", + "quantiles in order to compute bias." + ) + } bias <- sapply(1:n, function(i) { - bias_quantile_single_vector(observed[i], predicted[i,], quantile, na.rm) + bias_quantile_single_vector(observed[i], predicted[i, ], quantile, na.rm) }) return(bias) } @@ -408,6 +425,7 @@ bias_quantile <- function(observed, predicted, quantile, na.rm = TRUE) { #' the median is imputed as being the mean of the two innermost quantiles. #' @inheritParams bias_quantile #' @return scalar with the quantile bias for a single quantile prediction +#' @keywords internal bias_quantile_single_vector <- function(observed, predicted, quantile, na.rm) { assert_number(observed) @@ -416,14 +434,14 @@ bias_quantile_single_vector <- function(observed, predicted, quantile, na.rm) { predicted_has_NAs <- anyNA(predicted) quantile_has_NAs <- anyNA(quantile) - if(any(predicted_has_NAs, quantile_has_NAs)) { - if (!na.rm) { - return(NA_real_) - } else { + if (any(predicted_has_NAs, quantile_has_NAs)) { + if (na.rm) { quantile <- quantile[!is.na(predicted)] predicted <- predicted[!is.na(predicted)] predicted <- predicted[!is.na(quantile)] quantile <- quantile[!is.na(quantile)] + } else { + return(NA_real_) } } @@ -437,10 +455,6 @@ bias_quantile_single_vector <- function(observed, predicted, quantile, na.rm) { median_prediction <- predicted[quantile == 0.5] } else { # if median is not available, compute as mean of two innermost quantile - message( - "Median not available, computing as mean of two innermost quantile", - " in order to compute bias." - ) median_prediction <- 0.5 * predicted[quantile == max(quantile[quantile < 0.5])] + 0.5 * predicted[quantile == min(quantile[quantile > 0.5])] @@ -517,7 +531,7 @@ ae_median_quantile <- function(observed, predicted, quantile) { #' @description #' Proper Scoring Rule to score quantile predictions. Smaller values are better. #' The quantile score is -#' closely related to the Interval score (see [interval_score()]) and is +#' closely related to the Interval score (see [wis()]) and is #' the quantile equivalent that works with single quantiles instead of #' central prediction intervals. #' @@ -616,12 +630,13 @@ wis_one_to_one <- function(observed, reformatted <- quantile_to_interval(observed, predicted, quantile) reformatted[, eval(cols) := do.call( interval_score, - list(observed = observed, - lower = lower, - upper = upper, - interval_range = range, - weigh = weigh, - separate_results = separate_results + list( + observed = observed, + lower = lower, + upper = upper, + interval_range = range, + weigh = weigh, + separate_results = separate_results ) )] @@ -665,7 +680,7 @@ wis_one_to_one <- function(observed, if (output == "matrix") { wis <- matrix(wis, nrow = n, ncol = N) if (separate_results) { - components <- lapply(components, function(x) matrix(x, nrow = n, ncol = N)) + components <- lapply(components, matrix, nrow = n, ncol = N) return(c(wis, components)) } else { return(wis) diff --git a/R/metrics-range.R b/R/metrics-range.R index 5263d8962..172380306 100644 --- a/R/metrics-range.R +++ b/R/metrics-range.R @@ -65,7 +65,7 @@ #' lower <- qnorm(alpha / 2, rnorm(30, mean = 1:30)) #' upper <- qnorm((1 - alpha / 2), rnorm(30, mean = 11:40)) #' -#' interval_score( +#' scoringutils:::interval_score( #' observed = observed, #' lower = lower, #' upper = upper, @@ -73,17 +73,18 @@ #' ) #' #' # gives a warning, as the interval_range should likely be 50 instead of 0.5 -#' interval_score(observed = 4, upper = 8, lower = 2, interval_range = 0.5) +#' scoringutils:::interval_score( +#' observed = 4, upper = 8, lower = 2, interval_range = 0.5 +#' ) #' #' # example with missing values and separate results -#' interval_score( +#' scoringutils:::interval_score( #' observed = c(observed, NA), #' lower = c(lower, NA), #' upper = c(NA, upper), #' separate_results = TRUE, #' interval_range = 90 #' ) -#' @export #' @keywords metric #' @references Strictly Proper Scoring Rules, Prediction,and Estimation, #' Tilmann Gneiting and Adrian E. Raftery, 2007, Journal of the American @@ -132,126 +133,3 @@ interval_score <- function(observed, return(score) } } - - -################################################################################ -# Metrics with a many-to-one relationship between input and score -################################################################################ - -#' @title Determines Bias of Quantile Forecasts based on the range of the -#' prediction intervals -#' -#' @description -#' Determines bias from quantile forecasts based on the range of the -#' prediction intervals. For an increasing number of quantiles this measure -#' converges against the sample based bias version for integer and continuous -#' forecasts. -#' -#' @details -#' For quantile forecasts, bias is measured as -#' -#' \deqn{ -#' B_t = (1 - 2 \cdot \max \{i | q_{t,i} \in Q_t \land q_{t,i} \leq x_t\}) -#' \mathbf{1}( x_t \leq q_{t, 0.5}) \\ -#' + (1 - 2 \cdot \min \{i | q_{t,i} \in Q_t \land q_{t,i} \geq x_t\}) -#' \mathbf{1}( x_t \geq q_{t, 0.5}), -#' }{ -#' B_t = (1 - 2 * max(i | q_{t,i} in Q_t and q_{t,i} <= x_t\)) -#' 1( x_t <= q_{t, 0.5}) + (1 - 2 * min(i | q_{t,i} in Q_t and q_{t,i} >= x_t)) -#' 1( x_t >= q_{t, 0.5}), -#' } -#' -#' where \eqn{Q_t} is the set of quantiles that form the predictive -#' distribution at time \eqn{t}. They represent our -#' belief about what the later observed value \eqn{x_t} will be. For -#' consistency, we define -#' \eqn{Q_t} such that it always includes the element -#' \eqn{q_{t, 0} = - \infty} and \eqn{q_{t,1} = \infty}. -#' \eqn{\mathbf{1}()}{1()} is the indicator function that is \eqn{1} if the -#' condition is satisfied and $0$ otherwise. In clearer terms, \eqn{B_t} is -#' defined as the maximum percentile rank for which the corresponding quantile -#' is still below the observed value, if the observed value is smaller than the -#' median of the predictive distribution. If the observed value is above the -#' median of the predictive distribution, then $B_t$ is the minimum percentile -#' rank for which the corresponding quantile is still larger than the true -#' value. If the observed value is exactly the median, both terms cancel out and -#' \eqn{B_t} is zero. For a large enough number of quantiles, the -#' percentile rank will equal the proportion of predictive samples below the -#' observed value, and this metric coincides with the one for -#' continuous forecasts. -#' -#' Bias can assume values between -#' -1 and 1 and is 0 ideally. -#' @param lower vector of length corresponding to the number of central -#' prediction intervals that holds predictions for the lower bounds of a -#' prediction interval -#' @param upper vector of length corresponding to the number of central -#' prediction intervals that holds predictions for the upper bounds of a -#' prediction interval -#' @param range vector of corresponding size with information about the width -#' of the central prediction interval -#' @param observed a single observed value -#' @return scalar with the quantile bias for a single quantile prediction -#' @seealso bias_quantile bias_sample -#' @author Nikos Bosse \email{nikosbosse@@gmail.com} -#' @examples -#' -#' lower <- c( -#' 6341.000, 6329.500, 6087.014, 5703.500, -#' 5451.000, 5340.500, 4821.996, 4709.000, -#' 4341.500, 4006.250, 1127.000, 705.500 -#' ) -#' -#' upper <- c( -#' 6341.000, 6352.500, 6594.986, 6978.500, -#' 7231.000, 7341.500, 7860.004, 7973.000, -#' 8340.500, 8675.750, 11555.000, 11976.500 -#' ) -#' -#' range <- c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98) -#' -#' observed <- 8062 -#' -#' bias_range( -#' lower = lower, upper = upper, -#' range = range, observed = observed -#' ) -#' @export -#' @keywords metric - -bias_range <- function(lower, upper, range, observed) { - - if (anyNA(range)) { - if (is.na(range[1]) && !any(range[-1] == 0)) { - range[1] <- 0 - } - range <- range[!is.na(range)] - lower <- lower[!is.na(range)] - upper <- upper[!is.na(range)] - } - - if (length(range) > 1 && !all(diff(range) > 0)) { - stop("Range must be increasing") - } - - if (length(lower) != length(upper) || length(range) != length(lower)) { - stop("Inputs must have same length") - } - - check_quantiles(range, name = "range", range = c(0, 100)) - - # Convert range to quantiles - quantile <- c( - rev(abs(100 - range) / (2 * 100)), - abs(100 + range[range != 0]) / (2 * 100) - ) - - # Combine predictions - upper_without_median <- upper[range != 0] - predicted <- c(rev(lower), upper_without_median) - - # Call bias_quantile - bias <- bias_quantile(observed, predicted, quantile) - - return(bias) -} diff --git a/R/metrics-sample.R b/R/metrics-sample.R index 803fb4c4e..81d62e8fc 100644 --- a/R/metrics-sample.R +++ b/R/metrics-sample.R @@ -313,9 +313,7 @@ interval_coverage_sample <- function(observed, predicted, range = 50) { # this could call interval_coverage_quantile instead # ========================================================== interval_dt <- quantile_to_interval(quantile_dt, format = "wide") - interval_dt[, coverage := ifelse( - observed >= lower & observed <= upper, TRUE, FALSE - )] + interval_dt[, coverage := (observed >= lower) & (observed <= upper)] # ========================================================== return(interval_dt$coverage) } diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index eab561adb..525d1bca6 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -66,10 +66,10 @@ pairwise_comparison <- function(scores, baseline = NULL, ...) { metric <- match.arg(metric, c("auto", available_metrics())) - if (!is.data.table(scores)) { - scores <- as.data.table(scores) - } else { + if (is.data.table(scores)) { scores <- copy(scores) + } else { + scores <- as.data.table(scores) } # determine metric automatically @@ -228,8 +228,8 @@ pairwise_comparison_one_group <- function(scores, # make result character instead of factor result[, `:=`( - "model" = as.character(model), - "compare_against" = as.character(compare_against) + model = as.character(model), + compare_against = as.character(compare_against) )] # calculate relative skill as geometric mean diff --git a/R/pit.R b/R/pit.R index ee5e09c7c..067db5123 100644 --- a/R/pit.R +++ b/R/pit.R @@ -125,10 +125,10 @@ pit_sample <- function(observed, # check data type ------------------------------------------------------------ # check whether continuous or integer - if (!isTRUE(all.equal(as.vector(predicted), as.integer(predicted)))) { - continuous_predictions <- TRUE - } else { + if (isTRUE(all.equal(as.vector(predicted), as.integer(predicted)))) { continuous_predictions <- FALSE + } else { + continuous_predictions <- TRUE } # calculate PIT-values ------------------------------------------------------- @@ -185,7 +185,7 @@ pit <- function(data, by, n_replicates = 100) { - data <- validate(data) + data <- as_forecast(data) data <- remove_na_observed_predicted(data) forecast_type <- get_forecast_type(data) @@ -209,7 +209,7 @@ pit <- function(data, value.var = "predicted" ) - pit <- data_wide[, .("pit_value" = pit_sample( + pit <- data_wide[, .(pit_value = pit_sample( observed = observed, predicted = as.matrix(.SD) )), diff --git a/R/plot.R b/R/plot.R index b35153051..abd889d46 100644 --- a/R/plot.R +++ b/R/plot.R @@ -47,10 +47,8 @@ plot_score_table <- function(scores, id_vars <- get_forecast_unit(scores) metrics <- get_metrics(scores) - scores <- delete_columns( - scores, - names(scores)[!(names(scores) %in% c(metrics, id_vars))] - ) + cols_to_delete <- names(scores)[!(names(scores) %in% c(metrics, id_vars))] + suppressWarnings(scores[, eval(cols_to_delete) := NULL]) # compute scaled values ------------------------------------------------------ # scaling is done in order to colour the different scores @@ -404,11 +402,7 @@ plot_predictions <- function(data, del_cols <- colnames(truth_data)[!(colnames(truth_data) %in% c(by, "observed", x))] - truth_data <- delete_columns( - truth_data, - del_cols, - make_unique = TRUE - ) + truth_data <- unique(suppressWarnings(truth_data[, eval(del_cols) := NULL])) # find out what type of predictions we have. convert sample based to # range data @@ -471,7 +465,7 @@ plot_predictions <- function(data, # it separately here to deal with the case when only the median is provided # (in which case ggdist::geom_lineribbon() will fail) if (0 %in% range) { - select_median <- (forecasts$range %in% 0 & forecasts$boundary == "lower") + select_median <- (forecasts$range == 0 & forecasts$boundary == "lower") median <- forecasts[select_median] if (nrow(median) > 0) { @@ -942,54 +936,58 @@ plot_pit <- function(pit, #' #' @description #' Visualise Where Forecasts Are Available -#' @inheritParams print.scoringutils_check -#' @param x an S3 object of class "scoringutils_available_forecasts" -#' as produced by [available_forecasts()] -#' @param yvar character vector of length one that denotes the name of the column +#' @param forecast_counts a data.table (or similar) with a column `count` +#' holding forecast counts, as produced by [get_forecast_counts()] +#' @param x character vector of length one that denotes the name of the column +#' to appear on the x-axis of the plot. +#' @param y character vector of length one that denotes the name of the column #' to appear on the y-axis of the plot. Default is "model". -#' @param xvar character vector of length one that denotes the name of the column -#' to appear on the x-axis of the plot. Default is "forecast_date". -#' @param make_xvar_factor logical (default is TRUE). Whether or not to convert +#' @param x_as_factor logical (default is TRUE). Whether or not to convert #' the variable on the x-axis to a factor. This has an effect e.g. if dates #' are shown on the x-axis. -#' @param show_numbers logical (default is `TRUE`) that indicates whether +#' @param show_counts logical (default is `TRUE`) that indicates whether #' or not to show the actual count numbers on the plot #' @return ggplot object with a plot of interval coverage #' @importFrom ggplot2 ggplot scale_colour_manual scale_fill_manual #' geom_tile scale_fill_gradient .data #' @importFrom data.table dcast .I .N +#' @importFrom checkmate assert_string assert_logical assert #' @export #' @examples #' library(ggplot2) -#' available_forecasts <- available_forecasts( +#' forecast_counts <- get_forecast_counts( #' example_quantile, by = c("model", "target_type", "target_end_date") #' ) -#' plot( -#' available_forecasts, xvar = "target_end_date", show_numbers = FALSE +#' plot_forecast_counts( +#' forecast_counts, x = "target_end_date", show_counts = FALSE #' ) + #' facet_wrap("target_type") -plot.scoringutils_available_forecasts <- function(x, - yvar = "model", - xvar = "forecast_date", - make_xvar_factor = TRUE, - show_numbers = TRUE, - ...) { - x <- as.data.table(x) - - if (make_xvar_factor) { - x[, eval(xvar) := as.factor(get(xvar))] +plot_forecast_counts <- function(forecast_counts, + x, + y = "model", + x_as_factor = TRUE, + show_counts = TRUE) { + + forecast_counts <- ensure_data.table(forecast_counts) + assert_string(y) + assert_string(x) + assert(check_columns_present(forecast_counts, c(y, x))) + assert_logical(x_as_factor) + assert_logical(show_counts) + + if (x_as_factor) { + forecast_counts[, eval(x) := as.factor(get(x))] } - setnames(x, old = "count", new = "Count") + setnames(forecast_counts, old = "count", new = "Count") plot <- ggplot( - x, - aes(y = .data[[yvar]], x = .data[[xvar]]) + forecast_counts, + aes(y = .data[[y]], x = .data[[x]]) ) + geom_tile(aes(fill = `Count`), - width = 0.97, height = 0.97 - ) + + width = 0.97, height = 0.97) + scale_fill_gradient( low = "grey95", high = "steelblue", na.value = "lightgrey" @@ -1002,54 +1000,14 @@ plot.scoringutils_available_forecasts <- function(x, ) ) + theme(panel.spacing = unit(2, "lines")) - - if (show_numbers) { + if (show_counts) { plot <- plot + geom_text(aes(label = `Count`)) } - return(plot) } -#' @title Visualise Where Forecasts Are Available `r lifecycle::badge("deprecated")` -#' -#' @description -#' Old version of [plot.scoringutils_available_forecasts()] for compatibility. -#' @inheritParams plot.scoringutils_available_forecasts -#' @param available_forecasts an S3 object of class "scoringutils_available_forecasts" -#' as produced by [available_forecasts()] -#' @param y character vector of length one that denotes the name of the column -#' to appear on the y-axis of the plot. Default is "model". -#' @param x character vector of length one that denotes the name of the column -#' to appear on the x-axis of the plot. Default is "forecast_date". -#' @param make_x_factor logical (default is TRUE). Whether or not to convert -#' the variable on the x-axis to a factor. This has an effect e.g. if dates -#' are shown on the x-axis. -#' @export -plot_avail_forecasts <- function(available_forecasts, - y = "model", - x = "forecast_date", - make_x_factor = TRUE, - show_numbers = TRUE) { - - lifecycle::deprecate_warn( - "1.2.2", "plot_avail_forecasts()", - "plot()" - ) - - plot.scoringutils_available_forecasts( - x = available_forecasts, - yvar = y, - xvar = x, - make_xvar_factor = make_x_factor, - show_numbers = show_numbers - ) -} - - - - #' @title Plot Correlation Between Metrics #' #' @description diff --git a/R/score.R b/R/score.R index 29230e6e0..5ca049fa7 100644 --- a/R/score.R +++ b/R/score.R @@ -1,94 +1,25 @@ #' @title Evaluate forecasts in a data.frame format -#' #' @description `score()` applies a selection of scoring metrics to a data.frame #' of forecasts. It is the workhorse of the `scoringutils` package. #' `score()` is a generic that dispatches to different methods depending on the -#' class of the input data. The default method is `score.default()`, which -#' validates the input, assigns as class based on the forecast type, and then -#' calls `score()` again to dispatch to the appropriate method. See below for -#' more information on how forecast types are determined. -#' -#' @details -#' **Forecast types and input format** -#' -#' Various different forecast types / forecast formats are supported. At the -#' moment, those are -#' - point forecasts -#' - binary forecasts ("soft binary classification") -#' - Probabilistic forecasts in a quantile-based format (a forecast is -#' represented as a set of predictive quantiles) -#' - Probabilistic forecasts in a sample-based format (a forecast is represented -#' as a set of predictive samples) -#' -#' Forecast types are determined based on the columns present in the input data. -#' -#' *Point forecasts* require a column `observed` of type numeric and a column -#' `predicted` of type numeric. -#' -#' *Binary forecasts* require a column `observed` of type factor with exactly -#' two levels and a column `predicted` of type numeric with probabilities, -#' corresponding to the probability that `observed` is equal to the second -#' factor level. See details [here][brier_score()] for more information. -#' -#' *Quantile-based forecasts* require a column `observed` of type numeric, -#' a column `predicted` of type numeric, and a column `quantile` of type numeric -#' with quantile-levels (between 0 and 1). -#' -#' *Sample-based forecasts* require a column `observed` of type numeric, -#' a column `predicted` of type numeric, and a column `sample_id` of type -#' numeric with sample indices. -#' -#' For more information see the vignettes and the example data -#' ([example_quantile], [example_continuous], [example_integer], -#' [example_point()], and [example_binary]). -#' -#' **Forecast unit** -#' -#' In order to score forecasts, `scoringutils` needs to know which of the rows -#' of the data belong together and jointly form a single forecasts. This is -#' easy e.g. for point forecast, where there is one row per forecast. For -#' quantile or sample-based forecasts, however, there are multiple rows that -#' belong to single forecast. -#' -#' The *forecast unit* or *unit of a single forecast* is then described by the -#' combination of columns that uniquely identify a single forecast. -#' For example, we could have forecasts made by different models in various -#' locations at different time points, each for several weeks into the future. -#' The forecast unit could then be described as -#' `forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")`. -#' `scoringutils` automatically tries to determine the unit of a single -#' forecast. It uses all existing columns for this, which means that no columns -#' must be present that are unrelated to the forecast unit. As a very simplistic -#' example, if you had an additional row, "even", that is one if the row number -#' is even and zero otherwise, then this would mess up scoring as `scoringutils` -#' then thinks that this column was relevant in defining the forecast unit. -#' -#' In order to avoid issues, we recommend using the function -#' [set_forecast_unit()] to determine the forecast unit manually. -#' The function simply drops unneeded columns, while making sure that all -#' necessary, 'protected columns' like "predicted" or "observed" are retained. -#' -#' **Validating inputs** -#' -#' We recommend that users validate their input prior to scoring using the -#' function [validate()] (though this will also be run internally by [score()]). -#' The function checks the input data and provides helpful information. -#' -#' -#' **Further help** +#' class of the input data. #' +#' We recommend that users call [as_forecast()] prior to calling `score()` to +#' validate the input data and convert it to a forecast object (though +#' `score.default()` will do this if it hasn't happened before). +#' See below for more information on forecast types and input formats. #' For additional help and examples, check out the [Getting Started #' Vignette](https://epiforecasts.io/scoringutils/articles/scoringutils.html) as #' well as the paper [Evaluating Forecasts with scoringutils in #' R](https://arxiv.org/abs/2205.07090). -#' +#' @inheritSection forecast_types Forecast types and input format +#' @inheritSection forecast_types Forecast unit #' @param data A data.frame or data.table with predicted and observed values. #' @param metrics A named list of scoring functions. Names will be used as #' column names in the output. See [metrics_point()], [metrics_binary()], #' `metrics_quantile()`, and [metrics_sample()] for more information on the #' default metrics used. #' @param ... additional arguments -#' #' @return A data.table with unsummarised scores. This will generally be #' one score per forecast (as defined by the unit of a single forecast). #' @@ -97,14 +28,12 @@ #' for individual quantiles. You can call [summarise_scores()]) on the #' unsummarised scores to obtain one score per forecast unit for quantile-based #' forecasts. -#' #' @importFrom data.table ':=' as.data.table -#' #' @examples #' library(magrittr) # pipe operator #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN #' -#' validated <- validate(example_quantile) +#' validated <- as_forecast(example_quantile) #' score(validated) %>% #' summarise_scores(by = c("model", "target_type")) #' @@ -114,7 +43,7 @@ #' set_forecast_unit( #' c("location", "target_end_date", "target_type", "horizon", "model") #' ) %>% -#' validate() %>% +#' as_forecast() %>% #' score() #' #' # forecast formats with different metrics @@ -125,13 +54,11 @@ #' score(example_integer) #' score(example_continuous) #' } -#' #' @author Nikos Bosse \email{nikosbosse@@gmail.com} #' @references #' Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S #' (2022) Evaluating Forecasts with scoringutils in R. #' \doi{10.48550/arXiv.2205.07090} -#' #' @export score <- function(data, ...) { @@ -141,18 +68,20 @@ score <- function(data, ...) { #' @rdname score #' @export score.default <- function(data, ...) { - data <- validate(data) + assert(check_data_columns(data)) + forecast_type <- get_forecast_type(data) + data <- new_forecast(data, paste0("forecast_", forecast_type)) score(data, ...) } #' @rdname score #' @export -score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { - data <- validate(data) +score.forecast_binary <- function(data, metrics = metrics_binary, ...) { + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) - data <- apply_metrics( + data <- apply_rules( data, metrics, data$observed, data$predicted, ... ) @@ -167,12 +96,12 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { #' @importFrom Metrics se ae ape #' @rdname score #' @export -score.scoringutils_point <- function(data, metrics = metrics_point, ...) { - data <- validate(data) +score.forecast_point <- function(data, metrics = metrics_point, ...) { + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) - data <- apply_metrics( + data <- apply_rules( data, metrics, data$observed, data$predicted, ... ) @@ -184,8 +113,8 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) { #' @rdname score #' @export -score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { - data <- validate(data) +score.forecast_sample <- function(data, metrics = metrics_sample, ...) { + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) @@ -206,7 +135,7 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { predicted <- do.call(rbind, data$predicted) data[, c("observed", "predicted", "scoringutils_N") := NULL] - data <- apply_metrics( + data <- apply_rules( data, metrics, observed, predicted, ... ) @@ -221,19 +150,21 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { #' @importFrom data.table `:=` as.data.table rbindlist %like% #' @rdname score #' @export -score.scoringutils_quantile <- function(data, metrics = metrics_quantile, ...) { - data <- validate(data) +score.forecast_quantile <- function(data, metrics = metrics_quantile, ...) { + data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) # transpose the forecasts that belong to the same forecast unit # make sure the quantiles and predictions are ordered in the same way - d_transposed <- data[, .(predicted = list(predicted[order(quantile)]), - observed = unique(observed), - quantile = list(quantile[order(quantile)]), - scoringutils_quantile = toString(quantile[order(quantile)])), - by = forecast_unit] + d_transposed <- data[, .( + predicted = list(predicted[order(quantile)]), + observed = unique(observed), + quantile = list(sort(quantile, na.last = TRUE)), + scoringutils_quantile = toString(sort(quantile, na.last = TRUE)) + ), + by = forecast_unit] # split according to quantile lengths and do calculations for different # quantile lengths separately. The function `wis()` assumes that all @@ -245,9 +176,11 @@ score.scoringutils_quantile <- function(data, metrics = metrics_quantile, ...) { observed <- data$observed predicted <- do.call(rbind, data$predicted) quantile <- unlist(unique(data$quantile)) - data[, c("observed", "predicted", "quantile", "scoringutils_quantile") := NULL] + data[, c( + "observed", "predicted", "quantile", "scoringutils_quantile" + ) := NULL] - data <- apply_metrics( + data <- apply_rules( data, metrics, observed, predicted, quantile, ... ) @@ -260,17 +193,25 @@ score.scoringutils_quantile <- function(data, metrics = metrics_quantile, ...) { return(data[]) } -apply_metrics <- function(data, metrics, ...) { + +#' @title Apply A List Of Functions To A Data Table Of Forecasts +#' @description This helper function applies scoring rules (stored as a list of +#' functions) to a data table of forecasts. `apply_rules` is used within +#' `score()` to apply all scoring rules to the data. +#' Scoring rules are wrapped in [run_safely()] to catch errors and to make +#' sure that only arguments are passed to the scoring rule that are actually +#' accepted by it. +#' @inheritParams score +#' @return A data table with the forecasts and the calculated metrics +#' @keywords internal +apply_rules <- function(data, metrics, ...) { expr <- expression( data[, (metric_name) := do.call(run_safely, list(..., fun = fun))] ) lapply(seq_along(metrics), function(i, data, ...) { - metric_name <- names(metrics[i]) - fun <- metrics[[i]] + metric_name <- names(metrics[i]) # nolint + fun <- metrics[[i]] # nolint eval(expr) }, data, ...) return(data) } - - - diff --git a/R/summarise_scores.R b/R/summarise_scores.R index f71a0f7cc..5bda00557 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -114,8 +114,8 @@ summarise_scores <- function(scores, stored_attributes <- c( get_scoringutils_attributes(scores), list( - "scoringutils_by" = by, - "unsummarised_scores" = scores + scoringutils_by = by, + unsummarised_scores = scores ) ) @@ -181,6 +181,7 @@ summarize_scores <- summarise_scores #' returned. By default (`NULL`), relative skill will not be scaled with #' respect to a baseline model. #' @export +#' @keywords keyword scoring add_pairwise_comparison <- function(scores, by = NULL, relative_skill_metric = "auto", diff --git a/R/utils.R b/R/utils.R index 53a2d800e..8e5e8a9e1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,31 +10,6 @@ available_metrics <- function() { } -#' Safely delete Columns From a Data.table -#' -#' @description take a vector of column names and delete the columns if they -#' are present in the data.table -#' @param df A data.table or data.frame from which columns shall be deleted -#' @param cols_to_delete character vector with names of columns to be deleted -#' @param make_unique whether to make the data set unique after removing columns -#' @importFrom data.table as.data.table -#' @return A data.table -#' -#' @keywords internal -#' -delete_columns <- function(df, cols_to_delete, make_unique = FALSE) { - df <- data.table::as.data.table(df) - delete_columns <- names(df)[names(df) %in% cols_to_delete] - if (length(delete_columns) > 0) { - if (make_unique) { - df <- unique(df[, eval(delete_columns) := NULL]) - } else { - df <- df[, eval(delete_columns) := NULL] - } - } - return(df) -} - remove_na_observed_predicted <- function(data) { # remove rows where predicted or observed value are NA ----------------------- data <- data[!is.na(observed) & !is.na(predicted)] @@ -45,9 +20,6 @@ remove_na_observed_predicted <- function(data) { } - - - #' @title Collapse several messages to one #' #' @description Internal helper function to facilitate generating messages @@ -80,7 +52,7 @@ collapse_messages <- function(type = "messages", messages) { #' @export #' @keywords check-forecasts #' @examples -#' check <- validate(example_quantile) +#' check <- as_forecast(example_quantile) #' print(check) print.scoringutils_check <- function(x, ...) { cat("Your forecasts seem to be for a target of the following type:\n") @@ -170,37 +142,6 @@ strip_attributes <- function(object, attributes) { return(object) } -#' Remove scoringutils_ Class and Attributes -#' @description This function removes all classes that start with -#' "scoringutils_" and all attributes associated with scoringutils. -#' -#' @param object An object to remove scoringutils classes and attributes from -#' @return The object with scoringutils classes and attributes removed -#' @keywords internal -remove_scoringutils_class <- function(object) { - if (is.null(object)) { - return(NULL) - } - if (is.null(class(object))) { - return(object) - } - # check if "scoringutils_" is in name of any class - if (any(grepl("scoringutils_", class(object), fixed = TRUE))) { - stored_attributes <- get_scoringutils_attributes(object) - - # remove all classes that contain "scoringutils_" - class(object) <- class(object)[!grepl( - "scoringutils_", class(object), - fixed = TRUE - )] - - # remove all scoringutils attributes - object <- strip_attributes(object, names(stored_attributes)) - - return(object) - } - return(object) -} #' @title Run a function safely #' @description This is a wrapper function designed to run a function safely @@ -218,6 +159,7 @@ remove_scoringutils_class <- function(object) { #' @param fun A function to execute #' @return The result of `fun` or `NULL` if `fun` errors #' @export +#' @keywords scoring #' @examples #' f <- function(x) {x} #' run_safely(2, fun = f) @@ -262,11 +204,10 @@ run_safely <- function(..., fun) { #' @keywords internal #' @importFrom data.table copy is.data.table as.data.table ensure_data.table <- function(data) { - if (!is.data.table(data)) { - data <- as.data.table(data) - } else { + if (is.data.table(data)) { data <- copy(data) + } else { + data <- as.data.table(data) } return(data) } - diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index 1b1302dbf..cbc7a8b23 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -65,12 +65,14 @@ merge_pred_and_obs <- function(forecasts, observations, basenames_y <- sub(".y$", "", colnames_y) # see whether the column name as well as the content is the same - overlapping <- (as.list(combined[, ..colnames_x]) %in% as.list(combined[, ..colnames_y])) & basenames_x == basenames_y + content_x <- as.list(combined[, ..colnames_x]) + content_y <- as.list(combined[, ..colnames_y]) + overlapping <- (content_x %in% content_y) & (basenames_x == basenames_y) overlap_names <- colnames_x[overlapping] basenames_overlap <- sub(".x$", "", overlap_names) # delete overlapping columns - if (length(basenames_overlap > 0)) { + if (length(basenames_overlap) > 0) { combined[, paste0(basenames_overlap, ".x") := NULL] combined[, paste0(basenames_overlap, ".y") := NULL] } @@ -176,6 +178,8 @@ range_long_to_quantile <- function(data, #' and upper bounds of the 50% and 90% prediction intervals (corresponding to #' the 0.25 and 0.75 as well as the 0.05 and 0.095 quantiles). #' @param ... method arguments +#' @keywords data-handling +#' @export quantile_to_interval <- function(...) { UseMethod("quantile_to_interval") } @@ -199,6 +203,7 @@ quantile_to_interval <- function(...) { #' @importFrom data.table copy #' @export #' @rdname quantile_to_interval +#' @keywords data-handling quantile_to_interval.data.frame <- function(dt, format = "long", keep_quantile_col = FALSE, @@ -218,7 +223,7 @@ quantile_to_interval.data.frame <- function(dt, } if (format == "wide") { - delete_columns(dt, "quantile") + suppressWarnings(dt[, "quantile" := NULL]) dt <- dcast(dt, ... ~ boundary, value.var = "predicted") # if there are NA values in `predicted`, this introduces a column "NA" if ("NA" %in% colnames(dt) && all(is.na(dt[["NA"]]))) { @@ -241,6 +246,7 @@ quantile_to_interval.data.frame <- function(dt, #' `forecast_id` and `range`. #' @export #' @rdname quantile_to_interval +#' @keywords data-handling quantile_to_interval.numeric <- function(observed, predicted, quantile, diff --git a/R/validate.R b/R/validate.R index d283f7aa8..4a1be5f1f 100644 --- a/R/validate.R +++ b/R/validate.R @@ -1,55 +1,70 @@ -#' @title Validate input data -#' -#' @description -#' -#' The default method, [validate.default()] only runs basic input checks. -#' It's main purpose is to determine the forecast type (binary, point, -#' sample-based or quantile-based) based on the input data. It then constructs -#' an appropriate class and calls [validate()] again which dispatches to the -#' appropriate method. -#' -#' Methods for the different classes run [validate_general()], which performs -#' checks that are the same for all forecast types and then perform specific -#' checks for the specific forecast type. -#' -#' You can find more information about input formats in the vignette. -#' To summarise, the data should come in one of four different formats: -#' - A format for binary predictions (see [example_binary]) -#' - A format for point predictions (see [example_point]) -#' - A sample-based format for discrete or continuous predictions -#' (see [example_continuous] and [example_integer]) -#' - A quantile-based format (see [example_quantile]) +#' @title Create a `forecast` Object +#' @description Convert a data.frame or similar of forecasts into an object of +#' class `forecast_*` and validate it. #' +#' `as_forecast()` determines the forecast type (binary, point, sample-based or +#' quantile-based) from the input data (using the function +#' [get_forecast_type()]. It then constructs an object of the +#' appropriate class (`forecast_binary`, `forecast_point`, `forecast_sample`, or +#' `forecast_quantile`, using the function [new_forecast()]). +#' Lastly, it calls [as_forecast()] on the object to make sure it conforms with +#' the required input formats. #' @inheritParams score +#' @inheritSection forecast_types Forecast types and input format #' @return Depending on the forecast type, an object of class -#' `scoringutils_binary`, `scoringutils_point`, `scoringutils_sample` or -#' `scoringutils_quantile`. -#' @importFrom data.table ':=' is.data.table -#' @importFrom checkmate assert_data_frame +#' `forecast_binary`, `forecast_point`, `forecast_sample` or +#' `forecast_quantile`. #' @export -#' @keywords validate -validate <- function(data, ...) { - UseMethod("validate") +#' @keywords check-forecasts +#' @examples +#' as_forecast(example_binary) +#' as_forecast(example_quantile) +as_forecast <- function(data, ...) { + UseMethod("as_forecast") } -#' @rdname validate +#' @rdname as_forecast #' @export -validate.default <- function(data, ...) { +as_forecast.default <- function(data, ...) { assert(check_data_columns(data)) # find forecast type forecast_type <- get_forecast_type(data) # construct class - data <- new_scoringutils(data, paste0("scoringutils_", forecast_type)) + data <- new_forecast(data, paste0("forecast_", forecast_type)) # validate class - validate(data) + validate_forecast(data) } -#' @rdname validate + +#' @title Validate input data +#' +#' @description +#' Methods for the different classes run [validate_general()], which performs +#' checks that are the same for all forecast types and then perform specific +#' checks for the specific forecast type. +#' @inheritParams score +#' @inheritSection forecast_types Forecast types and input format +#' @return Depending on the forecast type, an object of class +#' `forecast_binary`, `forecast_point`, `forecast_sample` or +#' `forecast_quantile`. +#' @importFrom data.table ':=' is.data.table +#' @importFrom checkmate assert_data_frame #' @export -validate.scoringutils_binary <- function(data, ...) { +#' @keywords check-forecasts +#' @examples +#' forecast <- as_forecast(example_binary) +#' validate_forecast(forecast) +validate_forecast <- function(data, ...) { + UseMethod("validate_forecast") +} + + +#' @export +#' @keywords check-forecasts +validate_forecast.forecast_binary <- function(data, ...) { data <- validate_general(data) columns_correct <- test_columns_not_present(data, c("sample_id", "quantile")) @@ -67,9 +82,10 @@ validate.scoringutils_binary <- function(data, ...) { return(data[]) } -#' @rdname validate + #' @export -validate.scoringutils_point <- function(data, ...) { +#' @keywords check-forecasts +validate_forecast.forecast_point <- function(data, ...) { data <- validate_general(data) input_check <- check_input_point(data$observed, data$predicted) @@ -81,21 +97,24 @@ validate.scoringutils_point <- function(data, ...) { return(data[]) } -#' @rdname validate + #' @export -validate.scoringutils_quantile <- function(data, ...) { +#' @keywords check-forecasts +validate_forecast.forecast_quantile <- function(data, ...) { data <- validate_general(data) assert_numeric(data$quantile, lower = 0, upper = 1) return(data[]) } -#' @rdname validate + #' @export -validate.scoringutils_sample <- function(data, ...) { +#' @keywords check-forecasts +validate_forecast.forecast_sample <- function(data, ...) { data <- validate_general(data) return(data[]) } + #' @title Apply scoringutls input checks that are the same across forecast types #' #' @description @@ -107,13 +126,13 @@ validate.scoringutils_sample <- function(data, ...) { #' - checks there are no duplicate forecasts #' - if appropriate, checks the number of samples / quantiles is the same #' for all forecasts -#' @inheritParams available_forecasts +#' @inheritParams get_forecast_counts #' @return returns the input, with a few new attributes that hold additional #' information, messages and warnings #' @importFrom data.table ':=' is.data.table setattr #' @importFrom checkmate assert_data_table #' @export -#' @keywords validate +#' @keywords internal_input_check validate_general <- function(data) { # check that data is a data.table and that the columns look fine assert_data_table(data) @@ -158,12 +177,12 @@ validate_general <- function(data) { #' - makes sure that a column called `model` exists and if not creates one #' - assigns a class #' -#' @inheritParams available_forecasts +#' @inheritParams get_forecast_counts #' @param classname name of the class to be created #' @return An object of the class indicated by `classname` #' @export -#' @keywords validate -new_scoringutils <- function(data, classname) { +#' @keywords internal +new_forecast <- function(data, classname) { data <- as.data.table(data) data <- assure_model_column(data) class(data) <- c(classname, class(data)) @@ -186,7 +205,7 @@ new_scoringutils <- function(data, classname) { #' @return A named list of metrics, with those filtered out that are not #' valid functions #' @importFrom checkmate assert_list test_list check_function - +#' @keywords internal_input_check validate_metrics <- function(metrics) { assert_list(metrics, min.len = 1, names = "named") diff --git a/README.Rmd b/README.Rmd index 0c4c41223..0e4644d62 100644 --- a/README.Rmd +++ b/README.Rmd @@ -85,12 +85,12 @@ example_quantile %>% ### Scoring forecasts -Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded columns. To verify everything is in order, the function `validate()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. +Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded columns. To verify everything is in order, the function `validate_forecast()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. ```{r score-example} example_quantile %>% set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - validate() %>% + as_forecast() %>% add_coverage() %>% score() %>% summarise_scores( diff --git a/README.md b/README.md index 0a5f93881..62867f902 100644 --- a/README.md +++ b/README.md @@ -116,19 +116,20 @@ column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded -columns. To verify everything is in order, the function `validate()` -should be used. The result of that check can then passed directly into -`score()`. `score()` returns unsummarised scores, which in most cases is -not what the user wants. Here we make use of additional functions from -`scoringutils` to add empirical coverage-levels (`add_coverage()`), and -scores relative to a baseline model (here chosen to be the -EuroCOVIDhub-ensemble model). See the getting started vignette for more -details. Finally we summarise these scores by model and target type. +columns. To verify everything is in order, the function +`validate_forecast()` should be used. The result of that check can then +passed directly into `score()`. `score()` returns unsummarised scores, +which in most cases is not what the user wants. Here we make use of +additional functions from `scoringutils` to add empirical +coverage-levels (`add_coverage()`), and scores relative to a baseline +model (here chosen to be the EuroCOVIDhub-ensemble model). See the +getting started vignette for more details. Finally we summarise these +scores by model and target type. ``` r example_quantile %>% set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - validate() %>% + as_forecast() %>% add_coverage() %>% score() %>% summarise_scores( diff --git a/_pkgdown.yml b/_pkgdown.yml index 8f21f55aa..ea4f1e2a0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,14 +21,14 @@ reference: - title: Package documentation contents: - scoringutils - - title: Functions to check and analyse inputs + - title: Functions to analyse inputs and obtain useful information contents: - has_keyword("check-forecasts") - title: Functions for convenient forecast evaluation contents: - score - has_keyword("scoring") - - starts_with("score_") + - starts_with("score") - title: Lower-level scoring functions contents: - has_keyword("metric") @@ -37,9 +37,12 @@ reference: - has_keyword("data-handling") - title: Functions for plotting and data visualisation contents: - - starts_with("plot_") + - starts_with("plot") - has_keyword("plotting") - - title: Internal functions + - title: Internal input check functions + contents: + - has_keyword("internal_input_check") + - title: Misc internal functions contents: - has_keyword("internal") - title: Example data and information diff --git a/inst/create-list-available-forecasts.R b/inst/create-list-available-forecasts.R index 07105a7f4..d1ee6cc43 100644 --- a/inst/create-list-available-forecasts.R +++ b/inst/create-list-available-forecasts.R @@ -30,7 +30,7 @@ metrics_quantile <- list( "bias" = bias_quantile, "coverage_50" = interval_coverage_quantile, "coverage_90" = \(...) {run_safely(..., range = 90, fun = interval_coverage_quantile)}, - "coverage_deviation" = interval_coverage_deviation_quantile, + "coverage_deviation" = interval_coverage_dev_quantile, "ae_median" = ae_median_quantile ) usethis::use_data(metrics_quantile, overwrite = TRUE) diff --git a/inst/manuscript/R/00-standalone-Figure-replication.R b/inst/manuscript/R/00-standalone-Figure-replication.R index 3b1b66b9c..373f25ec4 100644 --- a/inst/manuscript/R/00-standalone-Figure-replication.R +++ b/inst/manuscript/R/00-standalone-Figure-replication.R @@ -537,7 +537,7 @@ p2 + p1 + p_true + available_forecasts(data = example_integer, by = c("model", "target_type", "forecast_date")) |> plot_available_forecasts(x = "forecast_date", - show_numbers = FALSE) + + show_counts = FALSE) + facet_wrap(~ target_type) + labs(y = "Model", x = "Forecast date") diff --git a/inst/manuscript/manuscript.Rmd b/inst/manuscript/manuscript.Rmd index e907defac..782dc468d 100644 --- a/inst/manuscript/manuscript.Rmd +++ b/inst/manuscript/manuscript.Rmd @@ -432,7 +432,7 @@ library("ggplot2") available_forecasts(data = example_integer, by = c("model", "target_type", "forecast_date")) |> plot_available_forecasts(x = "forecast_date", - show_numbers = FALSE) + + show_counts = FALSE) + facet_wrap(~ target_type) + labs(y = "Model", x = "Forecast date") ``` diff --git a/man/add_pairwise_comparison.Rd b/man/add_pairwise_comparison.Rd index 31777dbfc..75217069c 100644 --- a/man/add_pairwise_comparison.Rd +++ b/man/add_pairwise_comparison.Rd @@ -41,3 +41,5 @@ Relative skill will be calculated for the aggregation level specified in \code{by}. WRITE MORE INFO HERE. } +\keyword{keyword} +\keyword{scoring} diff --git a/man/apply_rules.Rd b/man/apply_rules.Rd new file mode 100644 index 000000000..f64ba562f --- /dev/null +++ b/man/apply_rules.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/score.R +\name{apply_rules} +\alias{apply_rules} +\title{Apply A List Of Functions To A Data Table Of Forecasts} +\usage{ +apply_rules(data, metrics, ...) +} +\arguments{ +\item{data}{A data.frame or data.table with predicted and observed values.} + +\item{metrics}{A named list of scoring functions. Names will be used as +column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}}, +\code{metrics_quantile()}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the +default metrics used.} + +\item{...}{additional arguments} +} +\value{ +A data table with the forecasts and the calculated metrics +} +\description{ +This helper function applies scoring rules (stored as a list of +functions) to a data table of forecasts. \code{apply_rules} is used within +\code{score()} to apply all scoring rules to the data. +Scoring rules are wrapped in \code{\link[=run_safely]{run_safely()}} to catch errors and to make +sure that only arguments are passed to the scoring rule that are actually +accepted by it. +} +\keyword{internal} diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd new file mode 100644 index 000000000..3704364a8 --- /dev/null +++ b/man/as_forecast.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate.R +\name{as_forecast} +\alias{as_forecast} +\alias{as_forecast.default} +\title{Create a \code{forecast} Object} +\usage{ +as_forecast(data, ...) + +\method{as_forecast}{default}(data, ...) +} +\arguments{ +\item{data}{A data.frame or data.table with predicted and observed values.} + +\item{...}{additional arguments} +} +\value{ +Depending on the forecast type, an object of class +\code{forecast_binary}, \code{forecast_point}, \code{forecast_sample} or +\code{forecast_quantile}. +} +\description{ +Convert a data.frame or similar of forecasts into an object of +class \verb{forecast_*} and validate it. + +\code{as_forecast()} determines the forecast type (binary, point, sample-based or +quantile-based) from the input data (using the function +\code{\link[=get_forecast_type]{get_forecast_type()}}. It then constructs an object of the +appropriate class (\code{forecast_binary}, \code{forecast_point}, \code{forecast_sample}, or +\code{forecast_quantile}, using the function \code{\link[=new_forecast]{new_forecast()}}). +Lastly, it calls \code{\link[=as_forecast]{as_forecast()}} on the object to make sure it conforms with +the required input formats. +} +\section{Forecast types and input format}{ +Various different forecast types / forecast formats are supported. At the +moment, those are +\itemize{ +\item point forecasts +\item binary forecasts ("soft binary classification") +\item Probabilistic forecasts in a quantile-based format (a forecast is +represented as a set of predictive quantiles) +\item Probabilistic forecasts in a sample-based format (a forecast is represented +as a set of predictive samples) +} + +Forecast types are determined based on the columns present in the input data. + +\emph{Point forecasts} require a column \code{observed} of type numeric and a column +\code{predicted} of type numeric. + +\emph{Binary forecasts} require a column \code{observed} of type factor with exactly +two levels and a column \code{predicted} of type numeric with probabilities, +corresponding to the probability that \code{observed} is equal to the second +factor level. See details \link[=brier_score]{here} for more information. + +\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{quantile} of type numeric +with quantile-levels (between 0 and 1). + +\emph{Sample-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{sample_id} of type +numeric with sample indices. + +For more information see the vignettes and the example data +(\link{example_quantile}, \link{example_continuous}, \link{example_integer}, +\code{\link[=example_point]{example_point()}}, and \link{example_binary}). +} + +\examples{ +as_forecast(example_binary) +as_forecast(example_quantile) +} +\keyword{check-forecasts} diff --git a/man/assert_dims_ok_point.Rd b/man/assert_dims_ok_point.Rd new file mode 100644 index 000000000..e7534eb1b --- /dev/null +++ b/man/assert_dims_ok_point.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-inputs-scoring-functions.R +\name{assert_dims_ok_point} +\alias{assert_dims_ok_point} +\title{Assert Inputs Have Matching Dimensions} +\usage{ +assert_dims_ok_point(observed, predicted) +} +\arguments{ +\item{observed}{Input to be checked. Should be a factor of length n with +exactly two levels, holding the observed values. +The highest factor level is assumed to be the reference level. This means +that \code{predicted} represents the probability that the observed value is equal +to the highest factor level.} + +\item{predicted}{Input to be checked. \code{predicted} should be a vector of +length n, holding probabilities. Alternatively, \code{predicted} can be a matrix +of size n x 1. Values represent the probability that +the corresponding value in \code{observed} will be equal to the highest +available factor level.} +} +\value{ +Returns NULL invisibly if the assertion was successful and throws an +error otherwise. +} +\description{ +Function assesses whether input dimensions match. In the +following, n is the number of observations / forecasts. Scalar values may +be repeated to match the length of the other input. +Allowed options are therefore +\itemize{ +\item \code{observed} is vector of length 1 or length n +\item \code{predicted} is +\itemize{ +\item a vector of of length 1 or length n +\item a matrix with n rows and 1 column +} +} +} +\keyword{internal_input_check} diff --git a/man/assert_equal_length.Rd b/man/assert_equal_length.Rd index 8b83b4c85..686209bf4 100644 --- a/man/assert_equal_length.Rd +++ b/man/assert_equal_length.Rd @@ -23,4 +23,4 @@ error otherwise. \description{ Check whether variables all have the same length } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/assert_input_binary.Rd b/man/assert_input_binary.Rd index 4ca8f7883..e1aae3621 100644 --- a/man/assert_input_binary.Rd +++ b/man/assert_input_binary.Rd @@ -14,7 +14,8 @@ that \code{predicted} represents the probability that the observed value is equa to the highest factor level.} \item{predicted}{Input to be checked. \code{predicted} should be a vector of -length n, holding probabilities. Values represent the probability that +length n, holding probabilities. Alternatively, \code{predicted} can be a matrix +of size n x 1. Values represent the probability that the corresponding value in \code{observed} will be equal to the highest available factor level.} } @@ -26,4 +27,4 @@ error otherwise. Function assesses whether the inputs correspond to the requirements for scoring binary forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/assert_input_interval.Rd b/man/assert_input_interval.Rd index b8d2d93d0..4ab7025bc 100644 --- a/man/assert_input_interval.Rd +++ b/man/assert_input_interval.Rd @@ -28,4 +28,4 @@ error otherwise. Function assesses whether the inputs correspond to the requirements for scoring interval-based forecasts. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/assert_input_point.Rd b/man/assert_input_point.Rd index f2f9434a9..c2ea48b7c 100644 --- a/man/assert_input_point.Rd +++ b/man/assert_input_point.Rd @@ -21,4 +21,4 @@ error otherwise. Function assesses whether the inputs correspond to the requirements for scoring point forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/assert_input_quantile.Rd b/man/assert_input_quantile.Rd index c9a499136..380cf5e6f 100644 --- a/man/assert_input_quantile.Rd +++ b/man/assert_input_quantile.Rd @@ -31,4 +31,4 @@ error otherwise. Function assesses whether the inputs correspond to the requirements for scoring quantile-based forecasts. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/assert_input_sample.Rd b/man/assert_input_sample.Rd index 027899adf..52071391a 100644 --- a/man/assert_input_sample.Rd +++ b/man/assert_input_sample.Rd @@ -24,4 +24,4 @@ error otherwise. Function assesses whether the inputs correspond to the requirements for scoring sample-based forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/assert_not_null.Rd b/man/assert_not_null.Rd index 276615941..ce95cd10e 100644 --- a/man/assert_not_null.Rd +++ b/man/assert_not_null.Rd @@ -18,4 +18,4 @@ Check whether a certain variable is not \code{NULL} and return the name of that variable and the function call where the variable is missing. This function is a helper function that should only be called within other functions } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/assure_model_column.Rd b/man/assure_model_column.Rd index 456652df2..591588fea 100644 --- a/man/assure_model_column.Rd +++ b/man/assure_model_column.Rd @@ -6,6 +6,9 @@ \usage{ assure_model_column(data) } +\arguments{ +\item{data}{A data.frame or data.table with predicted and observed values.} +} \value{ The data.table with a column called \code{model} } @@ -13,4 +16,4 @@ The data.table with a column called \code{model} Check whether the data.table has a column called \code{model}. If not, a column called \code{model} is added with the value \verb{Unspecified model}. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/avail_forecasts.Rd b/man/avail_forecasts.Rd deleted file mode 100644 index 57d47c5cf..000000000 --- a/man/avail_forecasts.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/available_forecasts.R -\name{avail_forecasts} -\alias{avail_forecasts} -\title{Count Number of Available Forecasts \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} -\usage{ -avail_forecasts(data, by = NULL, collapse = c("quantile", "sample")) -} -\arguments{ -\item{data}{A data.frame or data.table with predicted and observed values.} - -\item{by}{character vector or \code{NULL} (the default) that denotes the -categories over which the number of forecasts should be counted. -By default (\code{by = NULL}) this will be the unit of a single forecast (i.e. -all available columns (apart from a few "protected" columns such as -'predicted' and 'observed') plus "quantile" or "sample_id" where present).} - -\item{collapse}{character vector (default is \verb{c("quantile", "sample_id"}) -with names of categories for which the number of rows should be collapsed to -one when counting. For example, a single forecast is usually represented by a -set of several quantiles or samples and collapsing these to one makes sure -that a single forecast only gets counted once. Setting \code{collapse = c()} -would mean that all quantiles / samples would be counted as individual -forecasts.} -} -\value{ -A data.table with columns as specified in \code{by} and an additional -column "count" with the number of forecasts. -} -\description{ -Given a data set with forecasts, count the number of available forecasts -for arbitrary grouping (e.g. the number of forecasts per model, or the -number of forecasts per model and location). -This is useful to determine whether there are any missing forecasts. -} -\details{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Deprecated in 1.2.2. Use -\code{\link[=available_forecasts]{available_forecasts()}} instead. -} -\examples{ -data.table::setDTthreads(1) # only needed to avoid issues on CRAN - -available_forecasts(example_quantile, - by = c("model", "target_type") -) -} -\keyword{check-forecasts} diff --git a/man/bias_quantile.Rd b/man/bias_quantile.Rd index a5f4b6492..d7e781253 100644 --- a/man/bias_quantile.Rd +++ b/man/bias_quantile.Rd @@ -7,7 +7,7 @@ bias_quantile(observed, predicted, quantile, na.rm = TRUE) } \arguments{ -\item{observed}{a single observed value} +\item{observed}{a single number representing the observed value} \item{predicted}{vector of length corresponding to the number of quantiles that holds predictions} @@ -57,7 +57,6 @@ Bias can assume values between -1 and 1 and is 0 ideally (i.e. unbiased). } \examples{ - predicted <- c( 705.500, 1127.000, 4006.250, 4341.500, 4709.000, 4821.996, 5340.500, 5451.000, 5703.500, 6087.014, 6329.500, 6341.000, @@ -71,7 +70,4 @@ observed <- 8062 bias_quantile(observed, predicted, quantile) } -\author{ -Nikos Bosse \email{nikosbosse@gmail.com} -} \keyword{metric} diff --git a/man/bias_quantile_single_vector.Rd b/man/bias_quantile_single_vector.Rd index 26ac893b5..5d7ba030d 100644 --- a/man/bias_quantile_single_vector.Rd +++ b/man/bias_quantile_single_vector.Rd @@ -25,3 +25,4 @@ scalar with the quantile bias for a single quantile prediction Internal function to compute bias for a single observed value, a vector of predicted values and a vector of quantiles. } +\keyword{internal} diff --git a/man/bias_range.Rd b/man/bias_range.Rd deleted file mode 100644 index ba638a1b4..000000000 --- a/man/bias_range.Rd +++ /dev/null @@ -1,98 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metrics-range.R -\name{bias_range} -\alias{bias_range} -\title{Determines Bias of Quantile Forecasts based on the range of the -prediction intervals} -\usage{ -bias_range(lower, upper, range, observed) -} -\arguments{ -\item{lower}{vector of length corresponding to the number of central -prediction intervals that holds predictions for the lower bounds of a -prediction interval} - -\item{upper}{vector of length corresponding to the number of central -prediction intervals that holds predictions for the upper bounds of a -prediction interval} - -\item{range}{vector of corresponding size with information about the width -of the central prediction interval} - -\item{observed}{a single observed value} -} -\value{ -scalar with the quantile bias for a single quantile prediction -} -\description{ -Determines bias from quantile forecasts based on the range of the -prediction intervals. For an increasing number of quantiles this measure -converges against the sample based bias version for integer and continuous -forecasts. -} -\details{ -For quantile forecasts, bias is measured as - -\deqn{ -B_t = (1 - 2 \cdot \max \{i | q_{t,i} \in Q_t \land q_{t,i} \leq x_t\}) -\mathbf{1}( x_t \leq q_{t, 0.5}) \\ -+ (1 - 2 \cdot \min \{i | q_{t,i} \in Q_t \land q_{t,i} \geq x_t\}) - \mathbf{1}( x_t \geq q_{t, 0.5}), -}{ -B_t = (1 - 2 * max(i | q_{t,i} in Q_t and q_{t,i} <= x_t\)) -1( x_t <= q_{t, 0.5}) + (1 - 2 * min(i | q_{t,i} in Q_t and q_{t,i} >= x_t)) - 1( x_t >= q_{t, 0.5}), -} - -where \eqn{Q_t} is the set of quantiles that form the predictive -distribution at time \eqn{t}. They represent our -belief about what the later observed value \eqn{x_t} will be. For -consistency, we define -\eqn{Q_t} such that it always includes the element -\eqn{q_{t, 0} = - \infty} and \eqn{q_{t,1} = \infty}. -\eqn{\mathbf{1}()}{1()} is the indicator function that is \eqn{1} if the -condition is satisfied and $0$ otherwise. In clearer terms, \eqn{B_t} is -defined as the maximum percentile rank for which the corresponding quantile -is still below the observed value, if the observed value is smaller than the -median of the predictive distribution. If the observed value is above the -median of the predictive distribution, then $B_t$ is the minimum percentile -rank for which the corresponding quantile is still larger than the true -value. If the observed value is exactly the median, both terms cancel out and -\eqn{B_t} is zero. For a large enough number of quantiles, the -percentile rank will equal the proportion of predictive samples below the -observed value, and this metric coincides with the one for -continuous forecasts. - -Bias can assume values between --1 and 1 and is 0 ideally. -} -\examples{ - -lower <- c( - 6341.000, 6329.500, 6087.014, 5703.500, - 5451.000, 5340.500, 4821.996, 4709.000, - 4341.500, 4006.250, 1127.000, 705.500 -) - -upper <- c( - 6341.000, 6352.500, 6594.986, 6978.500, - 7231.000, 7341.500, 7860.004, 7973.000, - 8340.500, 8675.750, 11555.000, 11976.500 -) - -range <- c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98) - -observed <- 8062 - -bias_range( - lower = lower, upper = upper, - range = range, observed = observed -) -} -\seealso{ -bias_quantile bias_sample -} -\author{ -Nikos Bosse \email{nikosbosse@gmail.com} -} -\keyword{metric} diff --git a/man/check_attribute_conflict.Rd b/man/check_attribute_conflict.Rd index c01ac264e..588771565 100644 --- a/man/check_attribute_conflict.Rd +++ b/man/check_attribute_conflict.Rd @@ -24,4 +24,4 @@ an attribute \code{forecast_unit} is stored, but is different from the \code{forecast_unit} inferred from the data. The check is successful if the stored and the inferred value are the same. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_columns_present.Rd b/man/check_columns_present.Rd index cfe76f064..392839b07 100644 --- a/man/check_columns_present.Rd +++ b/man/check_columns_present.Rd @@ -20,4 +20,4 @@ The functions loops over the column names and checks whether they are present. If an issue is encountered, the function immediately stops and returns a message with the first issue encountered. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_data_columns.Rd b/man/check_data_columns.Rd index 04fba0892..6115db266 100644 --- a/man/check_data_columns.Rd +++ b/man/check_data_columns.Rd @@ -18,4 +18,4 @@ Checks whether data is a data.frame, whether columns "observed" and "predicted" are present, and checks that only one of "quantile" and "sample_id" is present. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_dims_ok_point.Rd b/man/check_dims_ok_point.Rd new file mode 100644 index 000000000..b0fb82ab8 --- /dev/null +++ b/man/check_dims_ok_point.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-inputs-scoring-functions.R +\name{check_dims_ok_point} +\alias{check_dims_ok_point} +\title{Check Inputs Have Matching Dimensions} +\usage{ +check_dims_ok_point(observed, predicted) +} +\arguments{ +\item{observed}{Input to be checked. Should be a factor of length n with +exactly two levels, holding the observed values. +The highest factor level is assumed to be the reference level. This means +that \code{predicted} represents the probability that the observed value is equal +to the highest factor level.} + +\item{predicted}{Input to be checked. \code{predicted} should be a vector of +length n, holding probabilities. Alternatively, \code{predicted} can be a matrix +of size n x 1. Values represent the probability that +the corresponding value in \code{observed} will be equal to the highest +available factor level.} +} +\value{ +Returns TRUE if the check was successful and a string with an +error message otherwise +} +\description{ +Function assesses whether input dimensions match. In the +following, n is the number of observations / forecasts. Scalar values may +be repeated to match the length of the other input. +Allowed options are therefore +\itemize{ +\item \code{observed} is vector of length 1 or length n +\item \code{predicted} is +\itemize{ +\item a vector of of length 1 or length n +\item a matrix with n rows and 1 column +} +} +} +\keyword{internal_input_check} diff --git a/man/check_duplicates.Rd b/man/check_duplicates.Rd index e4b0918ee..dff552eb5 100644 --- a/man/check_duplicates.Rd +++ b/man/check_duplicates.Rd @@ -20,4 +20,4 @@ error message otherwise \description{ Runs \code{\link[=get_duplicate_forecasts]{get_duplicate_forecasts()}} and returns a message if an issue is encountered } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_has_attribute.Rd b/man/check_has_attribute.Rd index 339e0d6d0..5111aac38 100644 --- a/man/check_has_attribute.Rd +++ b/man/check_has_attribute.Rd @@ -18,4 +18,4 @@ error message otherwise \description{ Checks whether an object has an attribute } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_input_binary.Rd b/man/check_input_binary.Rd index 5b206f35b..86d4d2f9e 100644 --- a/man/check_input_binary.Rd +++ b/man/check_input_binary.Rd @@ -14,7 +14,8 @@ that \code{predicted} represents the probability that the observed value is equa to the highest factor level.} \item{predicted}{Input to be checked. \code{predicted} should be a vector of -length n, holding probabilities. Values represent the probability that +length n, holding probabilities. Alternatively, \code{predicted} can be a matrix +of size n x 1. Values represent the probability that the corresponding value in \code{observed} will be equal to the highest available factor level.} } @@ -26,4 +27,4 @@ error message otherwise Function assesses whether the inputs correspond to the requirements for scoring binary forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_input_interval.Rd b/man/check_input_interval.Rd index faa9e4db5..7b68f92f5 100644 --- a/man/check_input_interval.Rd +++ b/man/check_input_interval.Rd @@ -28,4 +28,4 @@ error message otherwise Function assesses whether the inputs correspond to the requirements for scoring interval-based forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_input_point.Rd b/man/check_input_point.Rd index 060b785b6..31e838db4 100644 --- a/man/check_input_point.Rd +++ b/man/check_input_point.Rd @@ -21,4 +21,4 @@ error message otherwise Function assesses whether the inputs correspond to the requirements for scoring point forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_input_quantile.Rd b/man/check_input_quantile.Rd index a2315aa2e..e0a9877b1 100644 --- a/man/check_input_quantile.Rd +++ b/man/check_input_quantile.Rd @@ -28,4 +28,4 @@ error message otherwise Function assesses whether the inputs correspond to the requirements for scoring quantile-based forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_input_sample.Rd b/man/check_input_sample.Rd index 607bebb5f..32724b8f7 100644 --- a/man/check_input_sample.Rd +++ b/man/check_input_sample.Rd @@ -24,4 +24,4 @@ error message otherwise Function assesses whether the inputs correspond to the requirements for scoring sample-based forecasts. } -\keyword{check-inputs} +\keyword{internal_input_check} diff --git a/man/check_no_NA_present.Rd b/man/check_no_NA_present.Rd index cf89ce468..bb742779d 100644 --- a/man/check_no_NA_present.Rd +++ b/man/check_no_NA_present.Rd @@ -20,4 +20,4 @@ Function checks whether any of the columns in a data.frame, as specified in \code{columns}, have NA values. If so, it returns a string with an error message, otherwise it returns TRUE. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_number_per_forecast.Rd b/man/check_number_per_forecast.Rd index 4d0a18432..f15ef1a54 100644 --- a/man/check_number_per_forecast.Rd +++ b/man/check_number_per_forecast.Rd @@ -20,4 +20,4 @@ Function checks the number of quantiles or samples per forecast. If the number of quantiles or samples is the same for all forecasts, it returns TRUE and a string with an error message otherwise. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_numeric_vector.Rd b/man/check_numeric_vector.Rd index 847bef940..1b736bc01 100644 --- a/man/check_numeric_vector.Rd +++ b/man/check_numeric_vector.Rd @@ -54,4 +54,4 @@ error message otherwise \description{ Helper function } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_quantiles.Rd b/man/check_quantiles.Rd index 7ddd0d2fc..3b3ae08b4 100644 --- a/man/check_quantiles.Rd +++ b/man/check_quantiles.Rd @@ -24,4 +24,4 @@ and contain no duplicates. This is used in \url{bias_range()} and \url{bias_quantile()} to provide informative errors to users. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/check_try.Rd b/man/check_try.Rd index 87f479f66..94574cf48 100644 --- a/man/check_try.Rd +++ b/man/check_try.Rd @@ -19,4 +19,4 @@ see whether assertions fail when checking inputs (i.e. to convert an \verb{assert_*()} statement into a check). If the expression fails, the error message is returned. If the expression succeeds, \code{TRUE} is returned. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/delete_columns.Rd b/man/delete_columns.Rd deleted file mode 100644 index 721e49be5..000000000 --- a/man/delete_columns.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{delete_columns} -\alias{delete_columns} -\title{Safely delete Columns From a Data.table} -\usage{ -delete_columns(df, cols_to_delete, make_unique = FALSE) -} -\arguments{ -\item{df}{A data.table or data.frame from which columns shall be deleted} - -\item{cols_to_delete}{character vector with names of columns to be deleted} - -\item{make_unique}{whether to make the data set unique after removing columns} -} -\value{ -A data.table -} -\description{ -take a vector of column names and delete the columns if they -are present in the data.table -} -\keyword{internal} diff --git a/man/document_assert_functions.Rd b/man/document_assert_functions.Rd index ee0dbc967..c8046bf67 100644 --- a/man/document_assert_functions.Rd +++ b/man/document_assert_functions.Rd @@ -10,3 +10,4 @@ error otherwise. \description{ Documentation template for check functions } +\keyword{internal} diff --git a/man/document_check_functions.Rd b/man/document_check_functions.Rd index 6f7f7f677..256db46f5 100644 --- a/man/document_check_functions.Rd +++ b/man/document_check_functions.Rd @@ -15,3 +15,4 @@ error message otherwise \description{ Documentation template for check functions } +\keyword{internal} diff --git a/man/document_score_data.Rd b/man/document_score_data.Rd index 9e30190d3..9b8b19558 100644 --- a/man/document_score_data.Rd +++ b/man/document_score_data.Rd @@ -12,3 +12,4 @@ specifications detailed in \code{\link[=score]{score()}}.} \description{ Documentation template for scoring input data } +\keyword{internal} diff --git a/man/document_test_functions.Rd b/man/document_test_functions.Rd index 620cb1989..68fe7c7a5 100644 --- a/man/document_test_functions.Rd +++ b/man/document_test_functions.Rd @@ -9,3 +9,4 @@ Returns TRUE if the check was successful and FALSE otherwise \description{ Documentation template for test functions } +\keyword{internal} diff --git a/man/example_integer.Rd b/man/example_integer.Rd index ebdc643af..c3c97183a 100644 --- a/man/example_integer.Rd +++ b/man/example_integer.Rd @@ -19,6 +19,9 @@ A data frame with 13,429 rows and 10 columns: \item{sample_id}{id for the corresponding sample} } } +\source{ +\url{https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/} +} \usage{ example_integer } diff --git a/man/forecast_types.Rd b/man/forecast_types.Rd new file mode 100644 index 000000000..b0d825f76 --- /dev/null +++ b/man/forecast_types.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{forecast_types} +\alias{forecast_types} +\title{Documentation template for forecast types} +\description{ +Documentation template for forecast types +} +\section{Forecast types and input format}{ +Various different forecast types / forecast formats are supported. At the +moment, those are +\itemize{ +\item point forecasts +\item binary forecasts ("soft binary classification") +\item Probabilistic forecasts in a quantile-based format (a forecast is +represented as a set of predictive quantiles) +\item Probabilistic forecasts in a sample-based format (a forecast is represented +as a set of predictive samples) +} + +Forecast types are determined based on the columns present in the input data. + +\emph{Point forecasts} require a column \code{observed} of type numeric and a column +\code{predicted} of type numeric. + +\emph{Binary forecasts} require a column \code{observed} of type factor with exactly +two levels and a column \code{predicted} of type numeric with probabilities, +corresponding to the probability that \code{observed} is equal to the second +factor level. See details \link[=brier_score]{here} for more information. + +\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{quantile} of type numeric +with quantile-levels (between 0 and 1). + +\emph{Sample-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{sample_id} of type +numeric with sample indices. + +For more information see the vignettes and the example data +(\link{example_quantile}, \link{example_continuous}, \link{example_integer}, +\code{\link[=example_point]{example_point()}}, and \link{example_binary}). +} + +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend using the function +\code{\link[=set_forecast_unit]{set_forecast_unit()}} to determine the forecast unit manually. +The function simply drops unneeded columns, while making sure that all +necessary, 'protected columns' like "predicted" or "observed" are retained. +} + +\keyword{internal} diff --git a/man/available_forecasts.Rd b/man/get_forecast_counts.Rd similarity index 91% rename from man/available_forecasts.Rd rename to man/get_forecast_counts.Rd index 3c214a1e3..1ed71aac7 100644 --- a/man/available_forecasts.Rd +++ b/man/get_forecast_counts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/available_forecasts.R -\name{available_forecasts} -\alias{available_forecasts} +\name{get_forecast_counts} +\alias{get_forecast_counts} \title{Count Number of Available Forecasts} \usage{ -available_forecasts(data, by = NULL, collapse = c("quantile", "sample_id")) +get_forecast_counts(data, by = NULL, collapse = c("quantile", "sample_id")) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} @@ -36,7 +36,7 @@ This is useful to determine whether there are any missing forecasts. \examples{ data.table::setDTthreads(1) # only needed to avoid issues on CRAN -available_forecasts(example_quantile, +get_forecast_counts(example_quantile, by = c("model", "target_type") ) } diff --git a/man/get_forecast_type.Rd b/man/get_forecast_type.Rd index a923baa80..27c4e08b2 100644 --- a/man/get_forecast_type.Rd +++ b/man/get_forecast_type.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_-functions.R \name{get_forecast_type} \alias{get_forecast_type} -\title{Infer the type of a forecast based on a data.frame} +\title{Infer Forecast Type} \usage{ get_forecast_type(data) } @@ -14,9 +14,20 @@ Character vector of length one with either "binary", "quantile", "sample" or "point". } \description{ -Internal helper function to get the type of the forecast. -Options are "sample-based", "quantile-based", "binary" or "point" forecast. -The function runs additional checks to make sure the data satisfies -requirements and throws an informative error if any issues are found. +Helper function to infer the forecast type based on a +data.frame or similar with predictions. Please check the vignettes to +learn more about forecast types. + +Possible forecast types are +\itemize{ +\item "sample-based" +\item "quantile-based" +\item "binary" +\item "point" forecast. } -\keyword{internal} + +The function runs additional checks to make sure the data satisfies the +requirements of the respective forecast type and throws an +informative error if any issues are found. +} +\keyword{check-forecasts} diff --git a/man/get_forecast_unit.Rd b/man/get_forecast_unit.Rd index 065fe94d4..bbe04d016 100644 --- a/man/get_forecast_unit.Rd +++ b/man/get_forecast_unit.Rd @@ -10,7 +10,11 @@ get_forecast_unit(data, check_conflict = FALSE) \item{data}{A data.frame or data.table with predicted and observed values.} \item{check_conflict}{Whether or not to check whether there is a conflict -between a stored attribute and the inferred forecast unit. Defaults to FALSE.} +between a stored attribute and the inferred forecast unit. When you create +a forecast object, the forecast unit is stored as an attribute. If you +later change the columns of the data, the forecast unit as inferred from the +data might change compared to the stored attribute. Should this result in a +warning? Defaults to FALSE.} } \value{ A character vector with the column names that define the unit of @@ -24,4 +28,4 @@ the columns that are protected, i.e. those returned by \code{\link[=get_protected_columns]{get_protected_columns()}} as well as the names of the metrics that were specified during scoring, if any. } -\keyword{internal} +\keyword{check-forecasts} diff --git a/man/get_metrics.Rd b/man/get_metrics.Rd index 438e6b8f5..aed3b5e81 100644 --- a/man/get_metrics.Rd +++ b/man/get_metrics.Rd @@ -7,7 +7,7 @@ get_metrics(scores) } \arguments{ -\item{score}{A data.table with an attribute \code{metric_names}} +\item{scores}{A data.table with an attribute \code{metric_names}} } \value{ Character vector with the metrics that were used for scoring. @@ -16,4 +16,4 @@ Character vector with the metrics that were used for scoring. Internal helper function to get the metrics that were used to score forecasts. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/get_scoringutils_attributes.Rd b/man/get_scoringutils_attributes.Rd index 82ce6dcbe..5f2770a6a 100644 --- a/man/get_scoringutils_attributes.Rd +++ b/man/get_scoringutils_attributes.Rd @@ -7,7 +7,7 @@ get_scoringutils_attributes(object) } \arguments{ -\item{object}{A object of class \code{scoringutils_}} +\item{object}{A object of class \code{forecast_}} } \value{ A named list with the attributes of that object. diff --git a/man/get_type.Rd b/man/get_type.Rd index bcce4b70a..596cf4c65 100644 --- a/man/get_type.Rd +++ b/man/get_type.Rd @@ -19,4 +19,4 @@ of observed or predicted values). The function checks whether the input is a factor, or else whether it is integer (or can be coerced to integer) or whether it's continuous. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/interval_coverage.Rd b/man/interval_coverage.Rd index 74256cc77..3da7973cc 100644 --- a/man/interval_coverage.Rd +++ b/man/interval_coverage.Rd @@ -52,3 +52,4 @@ quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) interval_coverage_quantile(observed, predicted, quantile) interval_coverage_sample(observed, predicted) } +\keyword{metric} diff --git a/man/interval_coverage_deviation_quantile.Rd b/man/interval_coverage_dev_quantile.Rd similarity index 92% rename from man/interval_coverage_deviation_quantile.Rd rename to man/interval_coverage_dev_quantile.Rd index 9a6029ec7..181c45c6b 100644 --- a/man/interval_coverage_deviation_quantile.Rd +++ b/man/interval_coverage_dev_quantile.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/metrics-quantile.R -\name{interval_coverage_deviation_quantile} -\alias{interval_coverage_deviation_quantile} +\name{interval_coverage_dev_quantile} +\alias{interval_coverage_dev_quantile} \title{Interval Coverage Deviation (For Quantile-Based Forecasts)} \usage{ -interval_coverage_deviation_quantile(observed, predicted, quantile) +interval_coverage_dev_quantile(observed, predicted, quantile) } \arguments{ \item{observed}{numeric vector of size n with the observed values} @@ -71,5 +71,6 @@ predicted <- rbind( c(-2, 0, 3, 3, 4) ) quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) -interval_coverage_deviation_quantile(observed, predicted, quantile) +interval_coverage_dev_quantile(observed, predicted, quantile) } +\keyword{metric} diff --git a/man/interval_score.Rd b/man/interval_score.Rd index 9c0f4c567..3616eabfb 100644 --- a/man/interval_score.Rd +++ b/man/interval_score.Rd @@ -82,7 +82,7 @@ alpha <- (100 - interval_range) / 100 lower <- qnorm(alpha / 2, rnorm(30, mean = 1:30)) upper <- qnorm((1 - alpha / 2), rnorm(30, mean = 11:40)) -interval_score( +scoringutils:::interval_score( observed = observed, lower = lower, upper = upper, @@ -90,10 +90,12 @@ interval_score( ) # gives a warning, as the interval_range should likely be 50 instead of 0.5 -interval_score(observed = 4, upper = 8, lower = 2, interval_range = 0.5) +scoringutils:::interval_score( + observed = 4, upper = 8, lower = 2, interval_range = 0.5 +) # example with missing values and separate results -interval_score( +scoringutils:::interval_score( observed = c(observed, NA), lower = c(lower, NA), upper = c(NA, upper), diff --git a/man/metrics_quantile.Rd b/man/metrics_quantile.Rd index ea444ee7e..d62cc1b3c 100644 --- a/man/metrics_quantile.Rd +++ b/man/metrics_quantile.Rd @@ -13,14 +13,16 @@ metrics_quantile \description{ A named list with functions: \itemize{ -\item "wis" = \code{\link[=wis]{wis()}} +\item "wis" = \link{wis} \item "overprediction" = \code{\link[=overprediction]{overprediction()}} \item "underprediction" = \code{\link[=underprediction]{underprediction()}} \item "dispersion" = \code{\link[=dispersion]{dispersion()}} \item "bias" = \code{\link[=bias_quantile]{bias_quantile()}} -\item "coverage_50" = \(...) {run_safely(..., range = 50, fun = \link[=interval_coverage_quantile]{interval_coverage_quantile})} -\item "coverage_90" = \(...) {run_safely(..., range = 90, fun = \link[=interval_coverage_quantile]{interval_coverage_quantile})} -\item "coverage_deviation" = \code{\link[=interval_coverage_deviation_quantile]{interval_coverage_deviation_quantile()}}, +\item "coverage_50" = \code{\link[=interval_coverage_quantile]{interval_coverage_quantile()}} +\item "coverage_90" = \(...) \{ +run_safely(..., range = 90, fun = \link{interval_coverage_quantile}) +\} +\item "coverage_deviation" = \code{\link[=interval_coverage_dev_quantile]{interval_coverage_dev_quantile()}}, \item "ae_median" = \code{\link[=ae_median_quantile]{ae_median_quantile()}} } } diff --git a/man/new_scoringutils.Rd b/man/new_forecast.Rd similarity index 85% rename from man/new_scoringutils.Rd rename to man/new_forecast.Rd index b2f83ff6a..772c5e1b4 100644 --- a/man/new_scoringutils.Rd +++ b/man/new_forecast.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/validate.R -\name{new_scoringutils} -\alias{new_scoringutils} +\name{new_forecast} +\alias{new_forecast} \title{Class constructor for scoringutils objects} \usage{ -new_scoringutils(data, classname) +new_forecast(data, classname) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} @@ -22,4 +22,4 @@ Construct a class based on a data.frame or similar. The constructor \item assigns a class } } -\keyword{validate} +\keyword{internal} diff --git a/man/plot.scoringutils_available_forecasts.Rd b/man/plot.scoringutils_available_forecasts.Rd deleted file mode 100644 index f11e0dc5d..000000000 --- a/man/plot.scoringutils_available_forecasts.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot.R -\name{plot.scoringutils_available_forecasts} -\alias{plot.scoringutils_available_forecasts} -\title{Visualise Where Forecasts Are Available} -\usage{ -\method{plot}{scoringutils_available_forecasts}( - x, - yvar = "model", - xvar = "forecast_date", - make_xvar_factor = TRUE, - show_numbers = TRUE, - ... -) -} -\arguments{ -\item{x}{an S3 object of class "scoringutils_available_forecasts" -as produced by \code{\link[=available_forecasts]{available_forecasts()}}} - -\item{yvar}{character vector of length one that denotes the name of the column -to appear on the y-axis of the plot. Default is "model".} - -\item{xvar}{character vector of length one that denotes the name of the column -to appear on the x-axis of the plot. Default is "forecast_date".} - -\item{make_xvar_factor}{logical (default is TRUE). Whether or not to convert -the variable on the x-axis to a factor. This has an effect e.g. if dates -are shown on the x-axis.} - -\item{show_numbers}{logical (default is \code{TRUE}) that indicates whether -or not to show the actual count numbers on the plot} - -\item{...}{additional arguments (not used here)} -} -\value{ -ggplot object with a plot of interval coverage -} -\description{ -Visualise Where Forecasts Are Available -} -\examples{ -library(ggplot2) -available_forecasts <- available_forecasts( - example_quantile, by = c("model", "target_type", "target_end_date") -) -plot( - available_forecasts, xvar = "target_end_date", show_numbers = FALSE -) + - facet_wrap("target_type") -} diff --git a/man/plot_avail_forecasts.Rd b/man/plot_avail_forecasts.Rd deleted file mode 100644 index c10035012..000000000 --- a/man/plot_avail_forecasts.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot.R -\name{plot_avail_forecasts} -\alias{plot_avail_forecasts} -\title{Visualise Where Forecasts Are Available \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} -\usage{ -plot_avail_forecasts( - available_forecasts, - y = "model", - x = "forecast_date", - make_x_factor = TRUE, - show_numbers = TRUE -) -} -\arguments{ -\item{available_forecasts}{an S3 object of class "scoringutils_available_forecasts" -as produced by \code{\link[=available_forecasts]{available_forecasts()}}} - -\item{y}{character vector of length one that denotes the name of the column -to appear on the y-axis of the plot. Default is "model".} - -\item{x}{character vector of length one that denotes the name of the column -to appear on the x-axis of the plot. Default is "forecast_date".} - -\item{make_x_factor}{logical (default is TRUE). Whether or not to convert -the variable on the x-axis to a factor. This has an effect e.g. if dates -are shown on the x-axis.} - -\item{show_numbers}{logical (default is \code{TRUE}) that indicates whether -or not to show the actual count numbers on the plot} -} -\description{ -Old version of \code{\link[=plot.scoringutils_available_forecasts]{plot.scoringutils_available_forecasts()}} for compatibility. -} diff --git a/man/plot_forecast_counts.Rd b/man/plot_forecast_counts.Rd new file mode 100644 index 000000000..c5285c9dd --- /dev/null +++ b/man/plot_forecast_counts.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{plot_forecast_counts} +\alias{plot_forecast_counts} +\title{Visualise Where Forecasts Are Available} +\usage{ +plot_forecast_counts( + forecast_counts, + x, + y = "model", + x_as_factor = TRUE, + show_counts = TRUE +) +} +\arguments{ +\item{forecast_counts}{a data.table (or similar) with a column \code{count} +holding forecast counts, as produced by \code{\link[=get_forecast_counts]{get_forecast_counts()}}} + +\item{x}{character vector of length one that denotes the name of the column +to appear on the x-axis of the plot.} + +\item{y}{character vector of length one that denotes the name of the column +to appear on the y-axis of the plot. Default is "model".} + +\item{x_as_factor}{logical (default is TRUE). Whether or not to convert +the variable on the x-axis to a factor. This has an effect e.g. if dates +are shown on the x-axis.} + +\item{show_counts}{logical (default is \code{TRUE}) that indicates whether +or not to show the actual count numbers on the plot} +} +\value{ +ggplot object with a plot of interval coverage +} +\description{ +Visualise Where Forecasts Are Available +} +\examples{ +library(ggplot2) +forecast_counts <- get_forecast_counts( + example_quantile, by = c("model", "target_type", "target_end_date") +) +plot_forecast_counts( + forecast_counts, x = "target_end_date", show_counts = FALSE +) + + facet_wrap("target_type") +} diff --git a/man/print.scoringutils_check.Rd b/man/print.scoringutils_check.Rd index 4918d4c0b..f411e226c 100644 --- a/man/print.scoringutils_check.Rd +++ b/man/print.scoringutils_check.Rd @@ -17,7 +17,7 @@ Helper function that prints the output generated by \code{check_forecasts()} } \examples{ -check <- validate(example_quantile) +check <- as_forecast(example_quantile) print(check) } \keyword{check-forecasts} diff --git a/man/quantile_score.Rd b/man/quantile_score.Rd index 4f1c5b0b7..0cab0b160 100644 --- a/man/quantile_score.Rd +++ b/man/quantile_score.Rd @@ -29,7 +29,7 @@ vector with the scoring values \description{ Proper Scoring Rule to score quantile predictions. Smaller values are better. The quantile score is -closely related to the Interval score (see \code{\link[=interval_score]{interval_score()}}) and is +closely related to the Interval score (see \code{\link[=wis]{wis()}}) and is the quantile equivalent that works with single quantiles instead of central prediction intervals. } diff --git a/man/quantile_to_interval.Rd b/man/quantile_to_interval.Rd index e99cbadad..b61498eee 100644 --- a/man/quantile_to_interval.Rd +++ b/man/quantile_to_interval.Rd @@ -65,3 +65,4 @@ be characterised by one or multiple prediction intervals, e.g. the lower and upper bounds of the 50\% and 90\% prediction intervals (corresponding to the 0.25 and 0.75 as well as the 0.05 and 0.095 quantiles). } +\keyword{data-handling} diff --git a/man/remove_scoringutils_class.Rd b/man/remove_scoringutils_class.Rd deleted file mode 100644 index 9257d84dd..000000000 --- a/man/remove_scoringutils_class.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{remove_scoringutils_class} -\alias{remove_scoringutils_class} -\title{Remove scoringutils_ Class and Attributes} -\usage{ -remove_scoringutils_class(object) -} -\arguments{ -\item{object}{An object to remove scoringutils classes and attributes from} -} -\value{ -The object with scoringutils classes and attributes removed -} -\description{ -This function removes all classes that start with -"scoringutils_" and all attributes associated with scoringutils. -} -\keyword{internal} diff --git a/man/run_safely.Rd b/man/run_safely.Rd index 9e673b061..b8c45751f 100644 --- a/man/run_safely.Rd +++ b/man/run_safely.Rd @@ -33,3 +33,4 @@ run_safely(2, y = 3, fun = f) run_safely(fun = f) run_safely(y = 3, fun = f) } +\keyword{scoring} diff --git a/man/score.Rd b/man/score.Rd index c26fbb24c..d317ed64a 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -3,23 +3,23 @@ \name{score} \alias{score} \alias{score.default} -\alias{score.scoringutils_binary} -\alias{score.scoringutils_point} -\alias{score.scoringutils_sample} -\alias{score.scoringutils_quantile} +\alias{score.forecast_binary} +\alias{score.forecast_point} +\alias{score.forecast_sample} +\alias{score.forecast_quantile} \title{Evaluate forecasts in a data.frame format} \usage{ score(data, ...) \method{score}{default}(data, ...) -\method{score}{scoringutils_binary}(data, metrics = metrics_binary, ...) +\method{score}{forecast_binary}(data, metrics = metrics_binary, ...) -\method{score}{scoringutils_point}(data, metrics = metrics_point, ...) +\method{score}{forecast_point}(data, metrics = metrics_point, ...) -\method{score}{scoringutils_sample}(data, metrics = metrics_sample, ...) +\method{score}{forecast_sample}(data, metrics = metrics_sample, ...) -\method{score}{scoringutils_quantile}(data, metrics = metrics_quantile, ...) +\method{score}{forecast_quantile}(data, metrics = metrics_quantile, ...) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} @@ -45,14 +45,16 @@ forecasts. \code{score()} applies a selection of scoring metrics to a data.frame of forecasts. It is the workhorse of the \code{scoringutils} package. \code{score()} is a generic that dispatches to different methods depending on the -class of the input data. The default method is \code{score.default()}, which -validates the input, assigns as class based on the forecast type, and then -calls \code{score()} again to dispatch to the appropriate method. See below for -more information on how forecast types are determined. -} -\details{ -\strong{Forecast types and input format} +class of the input data. +We recommend that users call \code{\link[=as_forecast]{as_forecast()}} prior to calling \code{score()} to +validate the input data and convert it to a forecast object (though +\code{score.default()} will do this if it hasn't happened before). +See below for more information on forecast types and input formats. +For additional help and examples, check out the \href{https://epiforecasts.io/scoringutils/articles/scoringutils.html}{Getting Started Vignette} as +well as the paper \href{https://arxiv.org/abs/2205.07090}{Evaluating Forecasts with scoringutils in R}. +} +\section{Forecast types and input format}{ Various different forecast types / forecast formats are supported. At the moment, those are \itemize{ @@ -85,9 +87,9 @@ numeric with sample indices. For more information see the vignettes and the example data (\link{example_quantile}, \link{example_continuous}, \link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}). +} -\strong{Forecast unit} - +\section{Forecast unit}{ In order to score forecasts, \code{scoringutils} needs to know which of the rows of the data belong together and jointly form a single forecasts. This is easy e.g. for point forecast, where there is one row per forecast. For @@ -111,23 +113,13 @@ In order to avoid issues, we recommend using the function \code{\link[=set_forecast_unit]{set_forecast_unit()}} to determine the forecast unit manually. The function simply drops unneeded columns, while making sure that all necessary, 'protected columns' like "predicted" or "observed" are retained. - -\strong{Validating inputs} - -We recommend that users validate their input prior to scoring using the -function \code{\link[=validate]{validate()}} (though this will also be run internally by \code{\link[=score]{score()}}). -The function checks the input data and provides helpful information. - -\strong{Further help} - -For additional help and examples, check out the \href{https://epiforecasts.io/scoringutils/articles/scoringutils.html}{Getting Started Vignette} as -well as the paper \href{https://arxiv.org/abs/2205.07090}{Evaluating Forecasts with scoringutils in R}. } + \examples{ library(magrittr) # pipe operator data.table::setDTthreads(1) # only needed to avoid issues on CRAN -validated <- validate(example_quantile) +validated <- as_forecast(example_quantile) score(validated) \%>\% summarise_scores(by = c("model", "target_type")) @@ -137,7 +129,7 @@ example_quantile \%>\% set_forecast_unit( c("location", "target_end_date", "target_type", "horizon", "model") ) \%>\% - validate() \%>\% + as_forecast() \%>\% score() # forecast formats with different metrics @@ -148,7 +140,6 @@ score(example_point) score(example_integer) score(example_continuous) } - } \references{ Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S diff --git a/man/set_forecast_unit.Rd b/man/set_forecast_unit.Rd index 0f1dcc7d3..d1d5e5639 100644 --- a/man/set_forecast_unit.Rd +++ b/man/set_forecast_unit.Rd @@ -27,7 +27,7 @@ of a single forecast automatically by simply assuming that all column names are relevant to determine the forecast unit. This may lead to unexpected behaviour, so setting the forecast unit explicitly can help make the code easier to debug and easier to read. When used as part of a workflow, -\code{set_forecast_unit()} can be directly piped into \code{validate()} to +\code{set_forecast_unit()} can be directly piped into \code{as_forecast()} to check everything is in order. } \examples{ diff --git a/man/test_columns_not_present.Rd b/man/test_columns_not_present.Rd index f55fe25b1..5f7b71994 100644 --- a/man/test_columns_not_present.Rd +++ b/man/test_columns_not_present.Rd @@ -19,4 +19,4 @@ The function checks whether all column names are NOT present. If none of the columns are present, the function returns TRUE. If one or more columns are present, the function returns FALSE. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/test_columns_present.Rd b/man/test_columns_present.Rd index ed5076417..1931ca59c 100644 --- a/man/test_columns_present.Rd +++ b/man/test_columns_present.Rd @@ -19,4 +19,4 @@ The function checks whether all column names are present. If one or more columns are missing, the function returns FALSE. If all columns are present, the function returns TRUE. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/test_forecast_type_is_binary.Rd b/man/test_forecast_type_is_binary.Rd index de6b0501c..79c952e17 100644 --- a/man/test_forecast_type_is_binary.Rd +++ b/man/test_forecast_type_is_binary.Rd @@ -15,4 +15,4 @@ Returns TRUE if basic requirements are satisfied and FALSE otherwise \description{ Checks type of the necessary columns. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/test_forecast_type_is_point.Rd b/man/test_forecast_type_is_point.Rd index 00cb9dd2c..ad1c624fc 100644 --- a/man/test_forecast_type_is_point.Rd +++ b/man/test_forecast_type_is_point.Rd @@ -15,4 +15,4 @@ Returns TRUE if basic requirements are satisfied and FALSE otherwise \description{ Checks type of the necessary columns. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/test_forecast_type_is_quantile.Rd b/man/test_forecast_type_is_quantile.Rd index 214a86d32..7f77043f3 100644 --- a/man/test_forecast_type_is_quantile.Rd +++ b/man/test_forecast_type_is_quantile.Rd @@ -15,4 +15,4 @@ Returns TRUE if basic requirements are satisfied and FALSE otherwise \description{ Checks type of the necessary columns. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/test_forecast_type_is_sample.Rd b/man/test_forecast_type_is_sample.Rd index 4568ed93e..be7fea95c 100644 --- a/man/test_forecast_type_is_sample.Rd +++ b/man/test_forecast_type_is_sample.Rd @@ -15,4 +15,4 @@ Returns TRUE if basic requirements are satisfied and FALSE otherwise \description{ Checks type of the necessary columns. } -\keyword{internal} +\keyword{internal_input_check} diff --git a/man/validate.Rd b/man/validate.Rd deleted file mode 100644 index 88badf49a..000000000 --- a/man/validate.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validate.R -\name{validate} -\alias{validate} -\alias{validate.default} -\alias{validate.scoringutils_binary} -\alias{validate.scoringutils_point} -\alias{validate.scoringutils_quantile} -\alias{validate.scoringutils_sample} -\title{Validate input data} -\usage{ -validate(data, ...) - -\method{validate}{default}(data, ...) - -\method{validate}{scoringutils_binary}(data, ...) - -\method{validate}{scoringutils_point}(data, ...) - -\method{validate}{scoringutils_quantile}(data, ...) - -\method{validate}{scoringutils_sample}(data, ...) -} -\arguments{ -\item{data}{A data.frame or data.table with predicted and observed values.} - -\item{...}{additional arguments} -} -\value{ -Depending on the forecast type, an object of class -\code{scoringutils_binary}, \code{scoringutils_point}, \code{scoringutils_sample} or -\code{scoringutils_quantile}. -} -\description{ -The default method, \code{\link[=validate.default]{validate.default()}} only runs basic input checks. -It's main purpose is to determine the forecast type (binary, point, -sample-based or quantile-based) based on the input data. It then constructs -an appropriate class and calls \code{\link[=validate]{validate()}} again which dispatches to the -appropriate method. - -Methods for the different classes run \code{\link[=validate_general]{validate_general()}}, which performs -checks that are the same for all forecast types and then perform specific -checks for the specific forecast type. - -You can find more information about input formats in the vignette. -To summarise, the data should come in one of four different formats: -\itemize{ -\item A format for binary predictions (see \link{example_binary}) -\item A format for point predictions (see \link{example_point}) -\item A sample-based format for discrete or continuous predictions -(see \link{example_continuous} and \link{example_integer}) -\item A quantile-based format (see \link{example_quantile}) -} -} -\keyword{validate} diff --git a/man/validate_forecast.Rd b/man/validate_forecast.Rd new file mode 100644 index 000000000..542e84dd5 --- /dev/null +++ b/man/validate_forecast.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate.R +\name{validate_forecast} +\alias{validate_forecast} +\title{Validate input data} +\usage{ +validate_forecast(data, ...) +} +\arguments{ +\item{data}{A data.frame or data.table with predicted and observed values.} + +\item{...}{additional arguments} +} +\value{ +Depending on the forecast type, an object of class +\code{forecast_binary}, \code{forecast_point}, \code{forecast_sample} or +\code{forecast_quantile}. +} +\description{ +Methods for the different classes run \code{\link[=validate_general]{validate_general()}}, which performs +checks that are the same for all forecast types and then perform specific +checks for the specific forecast type. +} +\section{Forecast types and input format}{ +Various different forecast types / forecast formats are supported. At the +moment, those are +\itemize{ +\item point forecasts +\item binary forecasts ("soft binary classification") +\item Probabilistic forecasts in a quantile-based format (a forecast is +represented as a set of predictive quantiles) +\item Probabilistic forecasts in a sample-based format (a forecast is represented +as a set of predictive samples) +} + +Forecast types are determined based on the columns present in the input data. + +\emph{Point forecasts} require a column \code{observed} of type numeric and a column +\code{predicted} of type numeric. + +\emph{Binary forecasts} require a column \code{observed} of type factor with exactly +two levels and a column \code{predicted} of type numeric with probabilities, +corresponding to the probability that \code{observed} is equal to the second +factor level. See details \link[=brier_score]{here} for more information. + +\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{quantile} of type numeric +with quantile-levels (between 0 and 1). + +\emph{Sample-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{sample_id} of type +numeric with sample indices. + +For more information see the vignettes and the example data +(\link{example_quantile}, \link{example_continuous}, \link{example_integer}, +\code{\link[=example_point]{example_point()}}, and \link{example_binary}). +} + +\examples{ +forecast <- as_forecast(example_binary) +validate_forecast(forecast) +} +\keyword{check-forecasts} diff --git a/man/validate_general.Rd b/man/validate_general.Rd index 548fa0322..e379c5a0f 100644 --- a/man/validate_general.Rd +++ b/man/validate_general.Rd @@ -25,4 +25,4 @@ forecast type. The function for all forecasts } } -\keyword{validate} +\keyword{internal_input_check} diff --git a/man/validate_metrics.Rd b/man/validate_metrics.Rd index 9ce9eda87..d373eaa58 100644 --- a/man/validate_metrics.Rd +++ b/man/validate_metrics.Rd @@ -21,3 +21,4 @@ of valid functions. The function is used in \code{\link[=score]{score()}} to make sure that all metrics are valid functions } +\keyword{internal_input_check} diff --git a/man/wis.Rd b/man/wis.Rd index 65f099757..eb749f309 100644 --- a/man/wis.Rd +++ b/man/wis.Rd @@ -141,3 +141,14 @@ can control the behvaviour with the \code{count_median_twice}-argument. WIS components can be computed individually using the functions \code{overprediction}, \code{underprediction}, and \code{dispersion.} } +\examples{ +observed <- c(1, -15, 22) +predicted <- rbind( + c(-1, 0, 1, 2, 3), + c(-2, 1, 2, 2, 4), + c(-2, 0, 3, 3, 4) +) +quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) +wis(observed, predicted, quantile) +} +\keyword{metric} diff --git a/tests/testthat/test-available_forecasts.R b/tests/testthat/test-available_forecasts.R index f8e0ad0c7..ffa9e4f9e 100644 --- a/tests/testthat/test-available_forecasts.R +++ b/tests/testthat/test-available_forecasts.R @@ -1,6 +1,6 @@ -test_that("available_forecasts() works as expected", { +test_that("get_forecast_counts() works as expected", { af <- suppressMessages( - available_forecasts(example_quantile, + get_forecast_counts(example_quantile, by = c("model", "target_type", "target_end_date") ) ) @@ -8,12 +8,12 @@ test_that("available_forecasts() works as expected", { expect_type(af$target_type, "character") expect_type(af$`count`, "integer") expect_equal(nrow(af[is.na(`count`)]), 0) - af <- available_forecasts(example_quantile, by = "model") + af <- get_forecast_counts(example_quantile, by = "model") expect_equal(nrow(af), 4) expect_equal(af$`count`, c(256, 256, 128, 247)) # Setting `collapse = c()` means that all quantiles and samples are counted - af <- available_forecasts( + af <- get_forecast_counts( example_quantile, by = "model", collapse = c() ) @@ -21,13 +21,13 @@ test_that("available_forecasts() works as expected", { expect_equal(af$`count`, c(5888, 5888, 2944, 5681)) # setting by = NULL, the default, results in by equal to forecast unit - af <- available_forecasts(example_quantile) + af <- get_forecast_counts(example_quantile) expect_equal(nrow(af), 50688) # check whether collapsing also works for model-based forecasts - af <- available_forecasts(example_integer, by = "model") + af <- get_forecast_counts(example_integer, by = "model") expect_equal(nrow(af), 4) - af <- available_forecasts(example_integer, by = "model", collapse = c()) + af <- get_forecast_counts(example_integer, by = "model", collapse = c()) expect_equal(af$count, c(10240, 10240, 5120, 9880)) }) diff --git a/tests/testthat/test-check_forecasts.R b/tests/testthat/test-check_forecasts.R index ece7ff644..004031d71 100644 --- a/tests/testthat/test-check_forecasts.R +++ b/tests/testthat/test-check_forecasts.R @@ -1,10 +1,10 @@ -test_that("validate() function works", { - check <- suppressMessages(validate(example_quantile)) - expect_s3_class(check, "scoringutils_quantile") +test_that("as_forecast() function works", { + check <- suppressMessages(as_forecast(example_quantile)) + expect_s3_class(check, "forecast_quantile") }) -test_that("validate() function has an error for empty data.frame", { - expect_error(suppressMessages(validate(data.frame()))) +test_that("as_forecast() function has an error for empty data.frame", { + expect_error(suppressMessages(as_forecast(data.frame()))) }) test_that("check_columns_present() works", { @@ -28,9 +28,9 @@ test_that("check_duplicates() works", { ) }) -# test_that("validate() function returns a message with NA in the data", { +# test_that("as_forecast() function returns a message with NA in the data", { # expect_message( -# { check <- validate(example_quantile) }, +# { check <- as_forecast(example_quantile) }, # "\\d+ values for `predicted` are NA" # ) # expect_match( @@ -39,47 +39,47 @@ test_that("check_duplicates() works", { # ) # }) -# test_that("validate() function returns messages with NA in the data", { +# test_that("as_forecast() function returns messages with NA in the data", { # example <- data.table::copy(example_quantile) # example[horizon == 2, observed := NA] -# check <- suppressMessages(validate(example)) +# check <- suppressMessages(as_forecast(example)) # # expect_equal(length(check$messages), 2) # }) -test_that("validate() function throws an error with duplicate forecasts", { +test_that("as_forecast() function throws an error with duplicate forecasts", { example <- rbind(example_quantile, example_quantile[1000:1010]) expect_error( - suppressMessages(suppressWarnings(validate(example))), + suppressMessages(suppressWarnings(as_forecast(example))), "Assertion on 'data' failed: There are instances with more than one forecast for the same target. This can't be right and needs to be resolved. Maybe you need to check the unit of a single forecast and add missing columns? Use the function get_duplicate_forecasts() to identify duplicate rows.", #nolint fixed = TRUE ) }) -test_that("validate() function creates a message when no model column is +test_that("as_forecast() function creates a message when no model column is present", { no_model <- data.table::copy(example_quantile[model == "EuroCOVIDhub-ensemble"])[, model := NULL][] expect_message( - suppressWarnings(validate(no_model)), + suppressWarnings(as_forecast(no_model)), "There is no column called `model` in the data.scoringutils assumes that all forecasts come from the same model") }) -test_that("validate() function throws an error when no predictions or observed values are present", { - expect_error(suppressMessages(suppressWarnings(validate( +test_that("as_forecast() function throws an error when no predictions or observed values are present", { + expect_error(suppressMessages(suppressWarnings(as_forecast( data.table::copy(example_quantile)[, predicted := NULL] ))), "Assertion on 'data' failed: Both columns `observed` and predicted` are needed.") - expect_error(suppressMessages(suppressWarnings(validate( + expect_error(suppressMessages(suppressWarnings(as_forecast( data.table::copy(example_quantile)[, observed := NULL] ))), "Assertion on 'data' failed: Both columns `observed` and predicted` are needed.") }) -# test_that("validate() function throws an error when no predictions or observed values are present", { -# expect_error(suppressMessages(suppressWarnings(validate( +# test_that("as_forecast() function throws an error when no predictions or observed values are present", { +# expect_error(suppressMessages(suppressWarnings(as_forecast( # data.table::copy(example_quantile)[, predicted := NA] # )))) # expect_error(suppressMessages(suppressWarnings(check_forecasts( @@ -87,14 +87,14 @@ test_that("validate() function throws an error when no predictions or observed v # )))) # }) -# test_that("validate() function throws an sample/quantile not present", { -# expect_error(suppressMessages(suppressWarnings(validate( +# test_that("as_forecast() function throws an sample/quantile not present", { +# expect_error(suppressMessages(suppressWarnings(as_forecast( # data.table::copy(example_quantile)[, quantile := NULL] # )))) # }) test_that("output of check_forecasts() is accepted as input to score()", { - check <- suppressMessages(validate(example_binary)) + check <- suppressMessages(as_forecast(example_binary)) expect_no_error( score_check <- score(check) ) diff --git a/tests/testthat/test-get_-functions.R b/tests/testthat/test-get_-functions.R index 0a499b243..5368270a1 100644 --- a/tests/testthat/test-get_-functions.R +++ b/tests/testthat/test-get_-functions.R @@ -1,3 +1,33 @@ +# ============================================================================== +# `get_forecast_unit()` +# ============================================================================== + +test_that("get_forecast_unit() works as expected", { + expect_equal( + get_forecast_unit(example_quantile), + c("location", "target_end_date", "target_type", "location_name", + "forecast_date", "model", "horizon") + ) + + expect_equal( + get_forecast_unit(scores_quantile), + c("location", "target_end_date", "target_type", "location_name", + "forecast_date", "model", "horizon") + ) + + data <- as_forecast(example_quantile) + ex <- data[, location := NULL] + expect_warning( + get_forecast_unit(ex, check_conflict = TRUE), + "Object has an attribute `forecast_unit`, but it looks different from what's expected based on the data. +Existing: forecast_date, horizon, location, location_name, model, target_end_date, target_type +Expected: forecast_date, horizon, location_name, model, target_end_date, target_type +Running `as_forecast()` again might solve the problem", +fixed = TRUE + ) +}) + + test_that("get_type() works as expected with vectors", { expect_equal(get_type(1:3), "integer") expect_equal(get_type(factor(1:2)), "classification") @@ -109,3 +139,39 @@ test_that("get_duplicate_forecasts() works as expected for point", { 22 ) }) + + +# ============================================================================== +# `get_forecast_type` +# ============================================================================== +test_that("get_forecast_type() works as expected", { + expect_equal(get_forecast_type(as.data.frame(example_quantile)), "quantile") + expect_equal(get_forecast_type(example_continuous), "sample") + expect_equal(get_forecast_type(example_integer), "sample") + expect_equal(get_forecast_type(example_binary), "binary") + expect_equal(get_forecast_type(example_point), "point") + + expect_error( + get_forecast_type(data.frame(x = 1:10)), + "Assertion on 'data' failed: Columns 'observed', 'predicted' not found in data.", + fixed = TRUE + ) + + df <- data.frame(observed = 1:10, predicted = factor(1:10)) + expect_error( + get_forecast_type(df), + "Checking `data`: input doesn't satisfy criteria for any forecast type. Are you missing a column `quantile` or `sample_id`? Please check the vignette for additional info.", + fixed = TRUE + ) + + data <- as_forecast(example_integer) + attr(data, "forecast_type") <- "binary" + expect_warning( + get_forecast_type(data), + "Object has an attribute `forecast_type`, but it looks different from what's expected based on the data. +Existing: binary +Expected: sample +Running `as_forecast()` again might solve the problem", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-metrics-binary.R b/tests/testthat/test-metrics-binary.R index 9c49e7050..fae7476c5 100644 --- a/tests/testthat/test-metrics-binary.R +++ b/tests/testthat/test-metrics-binary.R @@ -7,123 +7,181 @@ df <- data.table( id = 1:10 ) -# test input handling -test_that("function throws an error when missing observed or predicted", { - observed <- sample(c(0, 1), size = 10, replace = TRUE) - predicted <- replicate( - 20, - sample(c(0, 1), size = 10, replace = TRUE) - ) +observed_point <- rnorm(10) +predicted_point <- rnorm(10) - expect_error( - brier_score(predicted = predicted), - 'argument "observed" is missing, with no default' - ) +# ============================================================================== +# Test Input Checks - this also checks point inputs where functions are similar +# ============================================================================== +test_that("correct input works", { + expect_no_condition(assert_input_binary(observed, predicted)) + expect_no_condition(assert_input_point(observed_point, predicted_point)) - expect_error( - brier_score(observed = observed), - 'argument "predicted" is missing, with no default' + # observed is a single number and does not have the same length as predicted + expect_no_condition( + assert_input_binary(factor(1, levels = c(0, 1)), predicted) + ) + expect_no_condition( + assert_input_point(1, predicted_point) ) -}) - + # predicted is a single number and does not have the same length as observed + expect_no_condition(assert_input_binary(observed, predicted = 0.2)) + expect_no_condition(assert_input_point(observed_point, predicted = 0.2)) -test_that("function throws an error for wrong format of `observed`", { - observed <- factor(rpois(10, lambda = 1:10)) - predicted <- runif(10, min = 0, max = 1) + # predicted is a matrix with nrow equal to observed + expect_no_condition(assert_input_binary(observed, matrix(predicted))) + expect_no_condition(assert_input_point(observed_point, matrix(predicted_point))) +}) +# test input handling +test_that("function throws an error for wrong input formats", { + # observed value not as expected expect_error( - brier_score( - observed = observed, - predicted = predicted - ), - "Assertion on 'observed' failed: Must have exactly 2 levels." + assert_input_binary(observed = rnorm(10), predicted = predicted), + "Assertion on 'observed' failed: Must be of type 'factor', not 'double'." ) - - observed <- rnorm(10) expect_error( - brier_score( - observed = observed, - predicted = predicted - ), - "Assertion on 'observed' failed: Must be of type 'factor', not 'double'." + assert_input_binary(1:10, predicted), + "Assertion on 'observed' failed: Must be of type 'factor', not 'integer'." + ) + expect_error( + assert_input_binary(observed = observed, predicted = as.list(predicted)), + "Assertion on 'predicted' failed: Must be of type 'numeric', not 'list'." + ) + expect_error( + assert_input_point(observed = factor(rnorm(10)), predicted = predicted), + "Assertion on 'observed' failed: Must be of type 'numeric', not 'factor'." ) -}) - -test_that("function throws an error for wrong format of predictions", { - predicted <- runif(10, min = 0, max = 1) expect_error( - brier_score( - observed = observed, - predicted = as.list(predicted) - ), + assert_input_point(observed = observed_point, list(predicted_point)), "Assertion on 'predicted' failed: Must be of type 'numeric', not 'list'." ) - predicted <- runif(15, min = 0, max = 1) + # observed value has not 2 levels expect_error( - brier_score( - observed = observed, - predicted = predicted - ), - "`observed` and `predicted` need to be of same length when scoring binary forecasts", - # "Arguments to the following function call: 'brier_score(observed = observed, predicted = predicted)' should have the same length (or length one). Actual lengths: 10, 15", - fixed = TRUE + assert_input_binary(factor(1:10), predicted), + "Assertion on 'observed' failed: Must have exactly 2 levels." ) -}) -test_that("Input checking for binary forecasts works", { - # everything correct - expect_no_condition( - scoringutils:::assert_input_binary(observed, predicted) + # wrong length + expect_error( + assert_input_binary(observed = observed, predicted = runif(15, min = 0, max = 1)), + "`observed` and `predicted` must either be of length 1 or of equal length. Found 10 and 15", + fixed = TRUE + ) + expect_error( + assert_input_point(observed_point, runif(15, min = 0, max = 1)), + "Assertion on 'observed' failed: `observed` and `predicted` must either be of length 1 or of equal length. Found 10 and 15.", + fixed = TRUE ) # predicted > 1 expect_error( - scoringutils:::assert_input_binary(observed, predicted + 1), + assert_input_binary(observed, predicted + 1), "Assertion on 'predicted' failed: Element 1 is not <= 1." ) # predicted < 0 expect_error( - scoringutils:::assert_input_binary(observed, predicted - 1), + assert_input_binary(observed, predicted - 1), "Assertion on 'predicted' failed: Element 1 is not >= 0." ) - # observed value not factor + # predicted is a matrix with one row expect_error( - scoringutils:::assert_input_binary(1:10, predicted), - "Assertion on 'observed' failed: Must be of type 'factor', not 'integer'." + assert_input_binary(observed, predicted = matrix(0.2)), + "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows", + fixed = TRUE) + expect_error( + assert_input_point(observed_point, predicted = matrix(0.2)), + "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows", + fixed = TRUE) + + # predicted is a matrix with 2 rows + expect_error( + assert_input_binary(observed, matrix(rep(predicted, 2), ncol = 2)), + "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 1 cols, but has 2 cols", + fixed = TRUE ) +}) - # observed value has not 2 levels + +# ============================================================================== +# Test Binary Metrics +# ============================================================================== + +test_that("function throws an error when missing observed or predicted", { expect_error( - scoringutils:::assert_input_binary(factor(1:10), predicted), - "Assertion on 'observed' failed: Must have exactly 2 levels." + brier_score(predicted = predicted), + 'argument "observed" is missing, with no default' + ) + + expect_error( + brier_score(observed = observed), + 'argument "predicted" is missing, with no default' ) +}) +test_that("Brier score works with different inputs", { # observed is a single number and does not have the same length as predicted + expect_equal( + brier_score(factor(1, levels = c(0, 1)), predicted), + (1 - predicted)^2 + ) + + # predicted is a single number and does not have the same length as observed + expect_equal( + brier_score(observed, predicted = 0.2), + ifelse(observed == 1, (1 - 0.2)^2, (0.2)^2) + ) + + # predicted is a matrix with 1 row expect_error( - scoringutils:::assert_input_binary(factor(1), predicted), - "`observed` and `predicted` need to be of same length when scoring binary forecasts", + brier_score(observed, predicted = matrix(0.2)), + "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows", fixed = TRUE ) - # predicted is a matrix + # predicted is an array expect_error( - scoringutils:::assert_input_binary(observed, matrix(predicted)), - "Assertion on 'predicted' failed: Must be of type 'atomic vector', not 'matrix'." + brier_score(observed, predicted = array(0.2)), + "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'array'\n * check_matrix(predicted): Must be of type 'matrix', not 'array'", + fixed = TRUE ) }) + test_that("Binary metrics work within and outside of `score()`", { result <- score(df) expect_equal( brier_score(observed, predicted), result$brier_score ) + expect_equal( logs_binary(observed, predicted), result$log_score ) }) + +test_that("`logs_binary()` works as expected", { + # check against the function Metrics::ll + obs2 <- as.numeric(as.character(observed)) + expect_equal( + logs_binary(observed, predicted), + Metrics::ll(obs2, predicted) + ) + + # check this works for a single observed value + expect_equal( + logs_binary(observed[1], predicted), + Metrics::ll(obs2[1], predicted) + ) + + # check this works for a single predicted value + expect_equal( + logs_binary(observed, predicted[1]), + Metrics::ll(obs2, predicted[1]) + ) +}) diff --git a/tests/testthat/test-metrics-point.R b/tests/testthat/test-metrics-point.R index 2f64226df..54aeb623c 100644 --- a/tests/testthat/test-metrics-point.R +++ b/tests/testthat/test-metrics-point.R @@ -179,9 +179,9 @@ test_that("abs error is correct, point and median forecasts same", { observations = truth_scoringutils ) - data_scoringutils_point <- data_scoringutils[type == "point"][, quantile := NULL] + data_forecast_point <- data_scoringutils[type == "point"][, quantile := NULL] - eval <- score(data = data_scoringutils_point) + eval <- score(data = data_forecast_point) eval <- summarise_scores(eval, by = c( "location", "target_end_date", diff --git a/tests/testthat/test-metrics-quantile.R b/tests/testthat/test-metrics-quantile.R index 8dd6d6a22..b941a95aa 100644 --- a/tests/testthat/test-metrics-quantile.R +++ b/tests/testthat/test-metrics-quantile.R @@ -603,9 +603,9 @@ test_that("interval_coverage_quantile rejects wrong inputs", { # ============================================================================ # -# `interval_coverage_deviation_quantile` ===================================== # +# `interval_coverage_dev_quantile` ===================================== # # ============================================================================ # -test_that("interval_coverage_deviation_quantile works", { +test_that("interval_coverage_dev_quantile works", { existing_ranges <- unique(get_range_from_quantile(quantile)) expect_equal(existing_ranges, c(80, 50, 0)) @@ -614,7 +614,7 @@ test_that("interval_coverage_deviation_quantile works", { manual <- 0.5 * (cov_50 - 0.5) + 0.5 * (cov_80 - 0.8) expect_equal( - interval_coverage_deviation_quantile(observed, predicted, quantile), + interval_coverage_dev_quantile(observed, predicted, quantile), manual ) }) @@ -791,3 +791,17 @@ test_that("bias_quantile(): quantiles must be unique", { quantiles <- c(0.3, 0.5, 0.8, 0.9) expect_silent(bias_quantile(observed = 3, predicted, quantiles)) }) + +test_that("bias_quantile only produces one message", { + expect_message( + bias_quantile(observed, predicted[, -3], quantile[-3]), + "Median not available, computing bias as mean of the two innermost quantiles in order to compute bias." + ) +}) + +test_that("bias_quantile() works with point forecasts", { + predicted <- 1 + observed <- 1 + quantile <- 0.5 + expect_equal(bias_quantile(observed, predicted, quantile), 0) +}) diff --git a/tests/testthat/test-metrics-range.R b/tests/testthat/test-metrics-range.R deleted file mode 100644 index bd0290e9f..000000000 --- a/tests/testthat/test-metrics-range.R +++ /dev/null @@ -1,45 +0,0 @@ -test_that("bias_quantile() and bias_range() give the same result", { - predicted <- sort(rnorm(23)) - lower <- rev(predicted[1:12]) - upper <- predicted[12:23] - - range <- c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98) - quantiles <- c(0.01, 0.025, seq(0.05, 0.95, 0.05), 0.975, 0.99) - observed <- rnorm(1) - - range_bias <- bias_range( - lower = lower, upper = upper, - range = range, observed = observed - ) - range_quantile <- bias_quantile( - observed = observed, - predicted = predicted, - quantile = quantiles - ) - expect_equal(range_bias, range_quantile) -}) - -test_that("bias_range() works with point forecasts", { - predicted <- 1 - observed <- 1 - range <- c(0) - - expect_equal(bias_range(predicted, predicted, range, observed), 0) -}) - -test_that("bias_range(): ranges must be between 0 and 100", { - lower <- 4:1 - upper <- 5:8 - - # Failing example - range <- c(-10, 0, 10, 20) - expect_error( - bias_range(lower, upper, range, observed = 3), - "range must be between 0 and 100" - ) - - # Passing counter example - range <- c(0, 10, 20, 30) - expect_silent(bias_range(lower, upper, range, observed = 3)) -}) - diff --git a/tests/testthat/test-plot_avail_forecasts.R b/tests/testthat/test-plot_avail_forecasts.R index 9f2efea6b..f131a6d7a 100644 --- a/tests/testthat/test-plot_avail_forecasts.R +++ b/tests/testthat/test-plot_avail_forecasts.R @@ -1,11 +1,10 @@ -test_that("plot_available_forecasts() works as expected", { - available_forecasts <- suppressMessages( - available_forecasts(example_quantile, - by = c("model", "target_type", "target_end_date") - ) +test_that("plot.forecast_counts() works as expected", { + available_forecasts <- get_forecast_counts( + example_quantile, + by = c("model", "target_type", "target_end_date") ) - p <- plot(available_forecasts, - xvar = "target_end_date", show_numbers = FALSE + p <- plot_forecast_counts(available_forecasts, + x = "target_end_date", show_counts = FALSE ) + facet_wrap("target_type") expect_s3_class(p, "ggplot") diff --git a/tests/testthat/test-plot_correlation.R b/tests/testthat/test-plot_correlation.R index be29faf3b..0b170b963 100644 --- a/tests/testthat/test-plot_correlation.R +++ b/tests/testthat/test-plot_correlation.R @@ -1,5 +1,5 @@ test_that("plot_correlation() works as expected", { - correlations <- correlation(summarise_scores(scores), digits = 2) + correlations <- correlation(summarise_scores(scores_quantile), digits = 2) p <- plot_correlation(correlations) expect_s3_class(p, "ggplot") skip_on_cran() diff --git a/tests/testthat/test-plot_interval_coverage.R b/tests/testthat/test-plot_interval_coverage.R index 0e885219f..5ff1fbf98 100644 --- a/tests/testthat/test-plot_interval_coverage.R +++ b/tests/testthat/test-plot_interval_coverage.R @@ -1,5 +1,5 @@ test_that("plot_interval_coverage() works as expected", { - coverage <- add_coverage(example_quantile) |> + coverage <- add_coverage(example_quantile) %>% summarise_scores(by = c("model", "range")) p <- plot_interval_coverage(coverage) expect_s3_class(p, "ggplot") diff --git a/tests/testthat/test-plot_quantile_coverage.R b/tests/testthat/test-plot_quantile_coverage.R index 060b9be26..1851ccb5c 100644 --- a/tests/testthat/test-plot_quantile_coverage.R +++ b/tests/testthat/test-plot_quantile_coverage.R @@ -1,5 +1,5 @@ test_that("plot_quantile_coverage() works as expected", { - coverage <- add_coverage(example_quantile) |> + coverage <- add_coverage(example_quantile) %>% summarise_scores(by = c("model", "quantile")) p <- plot_quantile_coverage(coverage) diff --git a/tests/testthat/test-plot_ranges.R b/tests/testthat/test-plot_ranges.R index b4dec124e..9a18cffe3 100644 --- a/tests/testthat/test-plot_ranges.R +++ b/tests/testthat/test-plot_ranges.R @@ -1,8 +1,8 @@ m <- modifyList(metrics_no_cov_no_ae, list("bias" = NULL)) sum_scores <- copy(example_quantile) %>% - .[, interval_range := scoringutils:::get_range_from_quantile(quantile)] |> - score(metrics = m) |> + .[, interval_range := scoringutils:::get_range_from_quantile(quantile)] %>% + score(metrics = m) %>% summarise_scores(by = c("model", "target_type", "interval_range")) sum_scores[, range := interval_range] diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index a67575984..ac6249528 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -38,7 +38,7 @@ test_that("function produces output for a binary case", { expect_true("brier_score" %in% names(eval)) }) -test_that("score.scoringutils_binary() errors with only NA values", { +test_that("score.forecast_binary() errors with only NA values", { only_nas <- copy(example_binary)[, predicted := NA_real_] expect_error( score(only_nas), @@ -156,7 +156,7 @@ test_that("Changing metrics names works", { }) -test_that("score.scoringutils_point() errors with only NA values", { +test_that("score.forecast_point() errors with only NA values", { only_nas <- copy(example_point)[, predicted := NA_real_] expect_error( score(only_nas), @@ -239,7 +239,7 @@ test_that("WIS is the same with other metrics omitted or included", { }) -test_that("score.scoringutils_quantile() errors with only NA values", { +test_that("score.forecast_quantile() errors with only NA values", { only_nas <- copy(example_quantile)[, predicted := NA_real_] expect_error( score(only_nas), @@ -277,16 +277,31 @@ test_that("function throws an error if data is missing", { expect_error(suppressMessages(score(data = NULL))) }) -# test_that( -# "score() can support a sample column when a quantile forecast is used", { -# ex <- example_quantile[!is.na(quantile)][1:200, ] -# ex <- rbind( -# data.table::copy(ex)[, sample_id := 1], -# ex[, sample_id := 2] -# ) -# scores <- suppressWarnings(score(ex)) -# expect_snapshot(summarise_scores( -# summarise_scores(scores, by = "model"), by = "model", -# fun = signif, digits = 2 -# )) -# }) +# ============================================================================= +# `apply_rules()` +# ============================================================================= + +test_that("apply_rules() works", { + + dt <- data.table::data.table(x = 1:10) + scoringutils:::apply_rules( + data = dt, metrics = list("test" = function(x) x + 1), + dt$x + ) + expect_equal(dt$test, 2:11) + + # additional named argument works + expect_no_condition( + scoringutils:::apply_rules( + data = dt, metrics = list("test" = function(x) x + 1), + dt$x, y = dt$test) + ) + + # additional unnamed argument does not work + + expect_warning( + scoringutils:::apply_rules( + data = dt, metrics = list("test" = function(x) x + 1), + dt$x, dt$test) + ) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 5d8b37c3f..1afc9df1e 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -104,7 +104,7 @@ test_that("run_safely() works as expected", { # }) # test_that("is_scoringutils_check() is working", { -# checked <- suppressMessages(validate(example_binary)) +# checked <- suppressMessages(validate_forecast(example_binary)) # expect_true(is_scoringutils_check(checked)) # # checked$cleaned_data <- NULL diff --git a/vignettes/metric-details.Rmd b/vignettes/metric-details.Rmd index 300a501fc..c2e0fab95 100644 --- a/vignettes/metric-details.Rmd +++ b/vignettes/metric-details.Rmd @@ -64,7 +64,7 @@ data[, 1:6] %>% ```{r, echo = FALSE, results = "asis"} data <- readRDS( system.file( - "metrics-overview", "metrics-detailed.rds", + "metrics-overview", "metrics-detailed.rds", package = "scoringutils" ) ) diff --git a/vignettes/scoring-forecasts-directly.Rmd b/vignettes/scoring-forecasts-directly.Rmd index 9272f5d27..bd956092a 100644 --- a/vignettes/scoring-forecasts-directly.Rmd +++ b/vignettes/scoring-forecasts-directly.Rmd @@ -17,7 +17,6 @@ knitr::opts_chunk$set( ) library(scoringutils) library(data.table) -library(ggplot2) ``` A variety of metrics and scoring rules can also be accessed directly through @@ -203,16 +202,13 @@ the Interval Score converges to the CRPS for increasing number of quantiles. ```{r} -observed <- rnorm(30, mean = 1:30) -interval_range <- 90 -alpha <- (100 - interval_range) / 100 -lower <- qnorm(alpha / 2, rnorm(30, mean = 1:30)) -upper <- qnorm((1 - alpha / 2), rnorm(30, mean = 11:40)) - -interval_score( - observed = observed, - lower = lower, - upper = upper, - interval_range = interval_range +observed <- c(1, -15, 22) +predicted <- rbind( + c(-1, 0, 1, 2, 3), + c(-2, 1, 2, 2, 4), + c(-2, 0, 3, 3, 4) ) +quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9) + +wis(observed, predicted, quantile) ``` diff --git a/vignettes/scoringutils.Rmd b/vignettes/scoringutils.Rmd index 4ae09b2ac..e5e0e9aa8 100644 --- a/vignettes/scoringutils.Rmd +++ b/vignettes/scoringutils.Rmd @@ -35,7 +35,7 @@ Most of the time, the `score()` function will be able to do the entire evaluatio ```{r, echo=FALSE} requirements <- data.table( - "Format" = c( + Format = c( "quantile-based", "sample-based", "binary", "pairwise-comparisons" ), `Required columns` = c( @@ -60,29 +60,61 @@ set_forecast_unit( colnames() ``` +## Constructing a forecast object -## Checking the input data - -The function `validate()` can be used to check the input data. It gives a summary of what `scoringutils` thinks you are trying to achieve. It infers the type of the prediction target, the prediction format, and the unit of a single forecasts, gives an overview of the number of unique values per column (helpful for spotting missing data) and returns warnings or errors. +The function `as_forecast()` is be used to construct a forecast object and check the input data. It determines the forecast type, creates an object of the appropriate class (`forecast_binary`, `forecast_quantile` or `forecast_sample`), and validates the input data. Objects of class `forecast_*` have a print method that provides additional information. ```{r} head(example_quantile) ``` ```{r} -validate(example_quantile) +forecast_quantile <- example_quantile %>% + set_forecast_unit( + c("model", "location", "target_end_date", "forecast_date", + "target_type", "horizon") + ) %>% + as_forecast() + +forecast_quantile ``` If you are unsure what your input data should look like, have a look at the `example_quantile`, `example_integer`, `example_continuous` and `example_binary` data sets provided in the package. -The output of `validate()` can later be directly used as input to `score()` (otherwise, `score()` will just run `validate()` anyway internally). +The output of `as_forecast()` can later be directly used as input to `score()`. This is the recommended workflow (however, `score()` will run `as_forecast()` internally if this hasn't happened before). + +Note that in the above example some columns contain duplicated information with regards to the forecast unit, e.g. "location" and "location_name", and can be dropped. If we drop essential information, for example, the "target_type" column, we'll get an error informing us that the forecasts aren't uniquely identified any more. + +```{r, error=TRUE} +example_quantile %>% + set_forecast_unit( + c("location", "target_end_date", + "forecast_date", "model", "horizon") + ) %>% + as_forecast() +``` + +The function `get_duplicate_forecasts()` may help to investigate the issue. When filtering for only a single quantile of the EuroCOVIDhub-ensemble, we can see that there are indeed two forecasts for every date, location and horizon. + +```{r} +duplicates <- example_quantile %>% + set_forecast_unit( + c("location", "target_end_date", + "forecast_date", "model", "horizon") + ) %>% + get_duplicate_forecasts() + +duplicates[quantile == 0.5 & model == "EuroCOVIDhub-ensemble", ] %>% + head() +``` + ## Showing available forecasts -The function `available_forecasts()` may also be helpful to determine where forecasts are available. Using the `by` argument you can specify the level of summary. For example, to see how many forecasts there are per model and target_type, we can run +The function `get_forecast_counts()` may also be helpful to determine where forecasts are available. Using the `by` argument you can specify the level of summary. For example, to see how many forecasts there are per model and target_type, we can run ```{r} -available_forecasts(example_quantile, by = c("model", "target_type")) +get_forecast_counts(forecast_quantile, by = c("model", "target_type")) ``` We see that 'epiforecasts-EpiNow2' has some missing forecasts for the deaths forecast target and that UMass-MechBayes has no case forecasts. @@ -90,16 +122,16 @@ We see that 'epiforecasts-EpiNow2' has some missing forecasts for the deaths for This information can also be visualised using `plot()`: ```{r, fig.width=11, fig.height=6} -example_quantile %>% - available_forecasts(by = c("model", "forecast_date", "target_type")) %>% - plot() + +forecast_quantile %>% + get_forecast_counts(by = c("model", "forecast_date", "target_type")) %>% + plot_forecast_counts(x = "forecast_date") + facet_wrap(~ target_type) ``` You can also visualise forecasts directly using the `plot_predictions()` function: ```{r, fig.width = 9, fig.height = 6} -example_quantile %>% +forecast_quantile %>% make_NA( what = "truth", target_end_date >= "2021-07-15", @@ -121,82 +153,24 @@ example_quantile %>% Forecasts can easily be scored using the `score()` function. -For clarity, we suggest setting the forecast unit explicitly and you may also want to call `validate()` explicitly. +For clarity, we suggest setting the forecast unit explicitly and calling `as_forecast()` explicitly. ```{r} -scores <- example_quantile %>% - set_forecast_unit( - c("location", "target_end_date", "target_type", "location_name", - "forecast_date", "model", "horizon") - ) %>% - validate() %>% +scores <- forecast_quantile %>% score() -head(scores) -``` - -Note that in the above example some columns contain duplicated information with regards to the forecast unit, e.g. "location" and "location_name", and could be dropped. - -```{r} -example_quantile %>% - set_forecast_unit( - c("location", "target_end_date", "target_type", - "forecast_date", "model", "horizon") - ) %>% - validate() -``` - -If we drop essential information, for example, the "target_type" column, we'll get an error informing us that the forecasts aren't uniquely identified any more. - -```{r, error=TRUE} -example_quantile %>% - set_forecast_unit( - c("location", "target_end_date", - "forecast_date", "model", "horizon") - ) %>% - validate() -``` -The function `get_duplicate_forecasts()` may help to investigate the issue. When filtering for only median forecasts of the EuroCOVIDhub-ensemble, we can see that there are indeed two forecasts for every date, location and horizon. - -```{r} -duplicates <- example_quantile %>% - set_forecast_unit( - c("location", "target_end_date", - "forecast_date", "model", "horizon") - ) %>% - get_duplicate_forecasts() - -duplicates[quantile == 0.5 & model == "EuroCOVIDhub-ensemble", ] %>% - head() +print(scores, 2) ``` -The function `score()` returns unsumarised scores, which in most cases is not what the user wants. It returns a single score per forecast (as determined by the forecast unit). For forecasts in a quantile format, it returns one score per quantile. +The function `score()` returns unsumarised scores, which in most cases is not what the user wants. It returns a single score per forecast (as determined by the forecast unit). -A second function, `summarise_scores()` takes care of summarising these scores to the level specified by the user. The `by` argument can be used to define the level of summary. By default, `by = NULL` and the summary unit is assumed to be the same as the unit of a single forecast. For continuous forecasts, this means that nothing happens if `by` isn't specified. +A second function, `summarise_scores()` takes care of summarising these scores to the level specified by the user. The `by` argument can be used to define the level of summary. By default, `by = NULL` and the summary unit is assumed to be the same as the unit of a single forecast (meaning that nothing happens if `by` isn't specified). ```{r} scores <- score(example_continuous) all(scores == summarise_scores(scores), na.rm = TRUE) ``` -For quantile based forecasts, if `by = NULL`, then scores are summarised across quantiles and instead of one score per forecast_unit and quantile we only get one score per forecast unit. - -```{r} -scores <- example_quantile %>% - set_forecast_unit( - c("location", "target_end_date", "target_type", - "forecast_date", "model", "horizon") - ) %>% - validate() %>% - score() - -head(scores) - -scores %>% - summarise_scores() %>% - head() -``` - Through the `by` argument we can specify what unit of summary we want. We can also call `sumarise_scores()` multiple tines, e.g to round your outputs by specifying e.g. `signif()` as a summary function. ```{r} @@ -330,13 +304,13 @@ Relative scores for different models can be computed using pairwise comparisons, In `scoringutils`, pairwise comparisons can be made in two ways: Through the standalone function `pairwise_comparison()` or from within `summarise_scores()` which simply adds relative scores to an existing set of scores. ```{r} -example_quantile %>% +forecast_quantile %>% score() %>% pairwise_comparison(by = "model", baseline = "EuroCOVIDhub-baseline") ``` ```{r} -example_quantile %>% +forecast_quantile %>% score() %>% summarise_scores(by = "model") %>% add_pairwise_comparison(baseline = "EuroCOVIDhub-baseline") @@ -345,7 +319,7 @@ example_quantile %>% If using the `pairwise_comparison()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty. ```{r, fig.width=9, fig.height=7} -example_quantile %>% +forecast_quantile %>% score() %>% pairwise_comparison(by = c("model", "target_type")) %>% plot_pairwise_comparison() + @@ -360,7 +334,7 @@ example_quantile %>% It may sometimes be interesting to see how different scores correlate with each other. We can examine this using the function `correlation()`. When dealing with quantile-based forecasts, it is important to call `summarise_scorees()` before `correlation()` to summarise over quantiles before computing correlations. ```{r} -example_quantile %>% +forecast_quantile %>% score() %>% summarise_scores() %>% correlation() @@ -369,7 +343,7 @@ example_quantile %>% Visualising correlations: ```{r} -example_quantile %>% +forecast_quantile %>% score() %>% summarise_scores() %>% correlation(digits = 2) %>% @@ -381,7 +355,7 @@ example_quantile %>% ```{r, eval = FALSE, include = FALSE} -example_quantile %>% +forecast_quantile %>% score() %>% summarise_scores(by = c("model", "range", "target_type")) %>% plot_ranges() +