diff --git a/R/class-forecast-binary.R b/R/class-forecast-binary.R index 3c69deed..2f1b3eb0 100644 --- a/R/class-forecast-binary.R +++ b/R/class-forecast-binary.R @@ -1,12 +1,35 @@ #' @title Create a `forecast` object for binary forecasts -#' @description -#' Create a `forecast` object for binary forecasts. See more information on -#' forecast types and expected input formats by calling `?`[as_forecast()]. -#' @export -#' @inheritParams as_forecast +#' @inherit as_forecast_doc_template params description +#' @details +#' # Required input +#' +#' The input needs to be a data.frame or similar with the following columns: +#' - `observed`: `factor` with exactly two levels representing the observed +#' values. The highest factor level is assumed to be the reference level. +#' This means that corresponding value in `predicted` represent the +#' probability that the observed value is equal to the highest factor level. +#' - `predicted`: `numeric` with predicted probabilities, representing +#' the probability that the corresponding value in `observed` is equal to +#' the highest available factor level. +#' +#' For convenience, we recommend an additional column `model` holding the name +#' of the forecaster or model that produced a prediction, but this is not +#' strictly necessary. +#' +#' See the [example_binary] data set for an example. +#' @inheritSection forecast_types Forecast unit +#' @returns A `forecast` object of class `forecast_binary` #' @family functions to create forecast objects #' @importFrom cli cli_warn #' @keywords as_forecast +#' @export +#' @examples +#' as_forecast_binary( +#' example_binary, +#' predicted = "predicted", +#' forecast_unit = c("model", "target_type", "target_end_date", +#' "horizon", "location") +#' ) as_forecast_binary <- function(data, forecast_unit = NULL, observed = NULL, @@ -90,7 +113,7 @@ score.forecast_binary <- function(forecast, metrics = get_metrics(forecast), ... #' - "log_score" = [logs_binary()] #' @inheritSection illustration-input-metric-binary-point Input format #' @param x A forecast object (a validated data.table with predicted and -#' observed values, see [as_forecast()]). +#' observed values, see [as_forecast_binary()]). #' @param select A character vector of scoring rules to select from the list. If #' `select` is `NULL` (the default), all possible scoring rules are returned. #' @param exclude A character vector of scoring rules to exclude from the list. @@ -128,8 +151,8 @@ get_metrics.forecast_binary <- function(x, select = NULL, exclude = NULL, ...) { #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_binary` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_binary` (see [as_forecast_binary()]) +#' with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{location_name}{name of the country for which a prediction was made} diff --git a/R/class-forecast-nominal.R b/R/class-forecast-nominal.R index 5f89aad4..e5e659b2 100644 --- a/R/class-forecast-nominal.R +++ b/R/class-forecast-nominal.R @@ -1,16 +1,45 @@ #' @title Create a `forecast` object for nominal forecasts -#' @description -#' Nominal forecasts are a form of categorical forecasts where the possible -#' outcomes that the observed values can assume are not ordered. In that sense, -#' Nominal forecasts represent a generalisation of binary forecasts. -#' @inheritParams as_forecast +#' @inherit as_forecast_doc_template params description +#' @details +#' Nominal forecasts are a form of categorical forecasts and represent a +#' generalisation of binary forecasts to multiple outcomes. The possible +#' outcomes that the observed values can assume are not ordered. +#' +#' # Required input +#' +#' The input needs to be a data.frame or similar with the following columns: +#' - `observed`: Column with observed values of type `factor` with N levels, +#' where N is the number of possible outcomes. +#' The levels of the factor represent the possible outcomes that +#' the observed values can assume. +#' - `predicted`: `numeric` column with predicted probabilities. The values +#' represent the probability that the observed value is equal to the factor +#' level denoted in `predicted_label`. Note that forecasts must be complete, +#' i.e. there must be a probability assigned to every possible outcome and +#' those probabilities must sum to one. +#' - `predicted_label`: `factor` with N levels, denoting the outcome that the +#' probabilities in `predicted` correspond to. +#' +#' For convenience, we recommend an additional column `model` holding the name +#' of the forecaster or model that produced a prediction, but this is not +#' strictly necessary. +#' +#' See the [example_nominal] data set for an example. +#' @inheritSection forecast_types Forecast unit #' @param predicted_label (optional) Name of the column in `data` that denotes #' the outcome to which a predicted probability corresponds to. -#' This column will be renamed to "predicted_label". Only applicable to -#' nominal forecasts. +#' This column will be renamed to "predicted_label". +#' @returns A `forecast` object of class `forecast_nominal` #' @family functions to create forecast objects #' @keywords as_forecast #' @export +#' @examples +#' as_forecast_nominal( +#' na.omit(example_nominal), +#' predicted = "predicted", +#' forecast_unit = c("model", "target_type", "target_end_date", +#' "horizon", "location") +#' ) as_forecast_nominal <- function(data, forecast_unit = NULL, observed = NULL, @@ -135,8 +164,8 @@ get_metrics.forecast_nominal <- function(x, select = NULL, exclude = NULL, ...) #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_nominal` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_nominal` +#' (see [as_forecast_nominal()]) with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{target_end_date}{the date for which a prediction was made} diff --git a/R/class-forecast-point.R b/R/class-forecast-point.R index 15b54e3d..0dfc87ab 100644 --- a/R/class-forecast-point.R +++ b/R/class-forecast-point.R @@ -1,9 +1,19 @@ #' @title Create a `forecast` object for point forecasts -#' @description -#' Create a `forecast` object for point forecasts. See more information on -#' forecast types and expected input formats by calling `?`[as_forecast()]. -#' @inherit as_forecast params +#' @inherit as_forecast_doc_template params description +#' @details +#' # Required input +#' +#' The input needs to be a data.frame or similar with the following columns: +#' - `observed`: Column of type `numeric` with observed values. +#' - `predicted`: Column of type `numeric` with predicted values. +#' +#' For convenience, we recommend an additional column `model` holding the name +#' of the forecaster or model that produced a prediction, but this is not +#' strictly necessary. +#' +#' See the [example_point] data set for an example. #' @param ... Unused +#' @returns A `forecast` object of class `forecast_point` #' @family functions to create forecast objects #' @export #' @keywords as_forecast transform @@ -146,8 +156,8 @@ get_metrics.forecast_point <- function(x, select = NULL, exclude = NULL, ...) { #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_point` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_point` (see [as_forecast_point()]) +#' with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{target_end_date}{the date for which a prediction was made} diff --git a/R/class-forecast-quantile.R b/R/class-forecast-quantile.R index 409a4532..caf301fa 100644 --- a/R/class-forecast-quantile.R +++ b/R/class-forecast-quantile.R @@ -1,12 +1,34 @@ #' @title Create a `forecast` object for quantile-based forecasts -#' @description -#' Create a `forecast` object for quantile-based forecasts. See more information -#' on forecast types and expected input formats by calling `?`[as_forecast()]. +#' @inherit as_forecast_doc_template params description +#' @details +#' # Required input +#' +#' The input needs to be a data.frame or similar with the following columns: +#' - `observed`: Column of type `numeric` with observed values. +#' - `predicted`: Column of type `numeric` with predicted values. Predicted +#' values represent quantiles of the predictive distribution. +#' - `quantile_level`: Column of type `numeric`, denoting the quantile level of +#' the corresponding predicted value. +#' Quantile levels must be between 0 and 1. +#' +#' For convenience, we recommend an additional column `model` holding the name +#' of the forecaster or model that produced a prediction, but this is not +#' strictly necessary. +#' +#' See the [example_quantile] data set for an example. +#' @inheritSection forecast_types Forecast unit #' @param ... Unused #' @family functions to create forecast objects -#' @inheritParams as_forecast +#' @returns A `forecast` object of class `forecast_quantile` #' @export #' @keywords as_forecast transform +#' @examples +#' as_forecast_quantile( +#' example_quantile, +#' predicted = "predicted", +#' forecast_unit = c("model", "target_type", "target_end_date", +#' "horizon", "location") +#' ) as_forecast_quantile <- function(data, ...) { UseMethod("as_forecast_quantile") } @@ -237,8 +259,8 @@ get_pit_histogram.forecast_quantile <- function(forecast, num_bins = NULL, #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_quantile` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_quantile` +#' (see [as_forecast_quantile()]) with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{target_end_date}{the date for which a prediction was made} diff --git a/R/class-forecast-sample.R b/R/class-forecast-sample.R index a02a8038..352c139c 100644 --- a/R/class-forecast-sample.R +++ b/R/class-forecast-sample.R @@ -1,9 +1,26 @@ #' @title Create a `forecast` object for sample-based forecasts +#' @inherit as_forecast_doc_template params description +#' @details +#' # Required input +#' +#' The input needs to be a data.frame or similar with the following columns: +#' - `observed`: Column of type `numeric` with observed values. +#' - `predicted`: Column of type `numeric` with predicted values. Predicted +#' values represent random samples from the predictive distribution. +#' - `sample_id`: Column of any type with unique identifiers +#' (unique within a single forecast) for each sample. +#' +#' For convenience, we recommend an additional column `model` holding the name +#' of the forecaster or model that produced a prediction, but this is not +#' strictly necessary. +#' +#' See the [example_sample_continuous] and [example_sample_discrete] data set +#' for an example +#' @inheritSection forecast_types Forecast unit #' @param sample_id (optional) Name of the column in `data` that contains the -#' sample id. This column will be renamed to "sample_id". Only applicable to -#' sample-based forecasts. -#' @inheritParams as_forecast +#' sample id. This column will be renamed to "sample_id". #' @export +#' @returns A `forecast` object of class `forecast_sample` #' @family functions to create forecast objects #' @importFrom cli cli_warn #' @keywords as_forecast @@ -45,7 +62,7 @@ is_forecast_sample <- function(x) { #' @rdname as_forecast_quantile -#' @description +#' @details # Converting from `forecast_sample` to `forecast_quantile` #' When creating a `forecast_quantile` object from a `forecast_sample` object, #' the quantiles are estimated by computing empircal quantiles from the samples #' via [quantile()]. Note that empirical quantiles are a biased estimator for @@ -223,8 +240,8 @@ get_pit_histogram.forecast_sample <- function(forecast, num_bins = 10, #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_sample` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_sample` (see [as_forecast_sample()]) +#' with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{target_end_date}{the date for which a prediction was made} @@ -251,8 +268,8 @@ get_pit_histogram.forecast_sample <- function(forecast, num_bins = 10, #' The data was created using the script create-example-data.R in the inst/ #' folder (or the top level folder in a compiled package). #' -#' @format An object of class `forecast_sample` (see [as_forecast()]) with the -#' following columns: +#' @format An object of class `forecast_sample` (see [as_forecast_sample()]) +#' with the following columns: #' \describe{ #' \item{location}{the country for which a prediction was made} #' \item{target_end_date}{the date for which a prediction was made} diff --git a/R/class-forecast.R b/R/class-forecast.R index d74828a2..43a66adb 100644 --- a/R/class-forecast.R +++ b/R/class-forecast.R @@ -1,70 +1,9 @@ -#' @title General information on creating a `forecast` object -#' -#' @description -#' There are several `as_forecast_()` functions to process and validate -#' a data.frame (or similar) or similar with forecasts and observations. If -#' the input passes all input checks, those functions will be converted -#' to a `forecast` object. A forecast object is a `data.table` with a -#' class `forecast` and an additional class that depends on the forecast type. -#' Every forecast type has its own `as_forecast_()` function. -#' See the details section below for more information -#' on the expected input formats. -#' -#' The `as_forecast_()` functions give users some control over how their -#' data is parsed. -#' Using the arguments `observed`, `predicted`, etc. users can rename -#' existing columns of their input data to match the required columns for a -#' forecast object. Using the argument `forecast_unit`, users can specify the -#' the columns that uniquely identify a single forecast (and remove the others, -#' see docs for the internal [set_forecast_unit()] for details). -#' -#' The following functions are available: -#' - [as_forecast_point()] -#' - [as_forecast_binary()] -#' - [as_forecast_sample()] -#' - [as_forecast_quantile()] -#' -#' @param data A data.frame (or similar) with predicted and observed values. -#' See the details section of [as_forecast()] for additional information -#' on required input formats. -#' @param forecast_unit (optional) Name of the columns in `data` (after -#' any renaming of columns) that denote the unit of a -#' single forecast. See [get_forecast_unit()] for details. -#' If `NULL` (the default), all columns that are not required columns are -#' assumed to form the unit of a single forecast. If specified, all columns -#' that are not part of the forecast unit (or required columns) will be removed. -#' @param observed (optional) Name of the column in `data` that contains the -#' observed values. This column will be renamed to "observed". -#' @param predicted (optional) Name of the column in `data` that contains the -#' predicted values. This column will be renamed to "predicted". -#' @inheritSection forecast_types Forecast types and input formats -#' @inheritSection forecast_types Forecast unit -#' @return -#' Depending on the forecast type, an object of the following class will be -#' returned: -#' - `forecast_binary` for binary forecasts -#' - `forecast_point` for point forecasts -#' - `forecast_sample` for sample-based forecasts -#' - `forecast_quantile` for quantile-based forecasts -#' @keywords as_forecast -#' @family functions to create forecast objects -#' @examples -#' as_forecast_binary(example_binary) -#' as_forecast_quantile( -#' example_quantile, -#' forecast_unit = c("model", "target_type", "target_end_date", -#' "horizon", "location") -#' ) -#' @name as_forecast -NULL - - #' Common functionality for `as_forecast_` functions #' @details This function splits out part of the functionality of #' `as_forecast_` that is the same for all `as_forecast_` functions. #' It renames the required columns, where appropriate, and sets the forecast #' unit. -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' @keywords as_forecast as_forecast_generic <- function(data, forecast_unit = NULL, @@ -98,9 +37,13 @@ as_forecast_generic <- function(data, #' #' @description #' Assert that an object is a forecast object (i.e. a `data.table` with a class -#' `forecast` and an additional class `forecast_*` corresponding to the forecast -#' type). -#' @inheritParams as_forecast +#' `forecast` and an additional class `forecast_` corresponding to the +#' forecast type). +#' +#' See the corresponding `assert_forecast_` functions for more details on +#' the required input formats. +#' +#' @inheritParams as_forecast_doc_template #' @inheritParams score #' @param forecast_type (optional) The forecast type you expect the forecasts #' to have. If the forecast type as determined by `scoringutils` based on the @@ -108,7 +51,6 @@ as_forecast_generic <- function(data, #' default), the forecast type will be inferred from the data. #' @param verbose Logical. If `FALSE` (default is `TRUE`), no messages and #' warnings will be created. -#' @inheritSection forecast_types Forecast types and input formats #' @return #' Returns `NULL` invisibly. #' @importFrom data.table ':=' is.data.table @@ -277,7 +219,7 @@ clean_forecast <- function(forecast, copy = FALSE, na.omit = FALSE) { #' - coerces the data into a data.table #' - assigns a class #' -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' @param classname name of the class to be created #' @returns An object of the class indicated by `classname` #' @export @@ -293,11 +235,10 @@ new_forecast <- function(data, classname) { #' @title Test whether an object is a forecast object #' #' @description -#' Test whether an object is a forecast object (see [as_forecast()] for more -#' information). +#' Test whether an object is a forecast object. #' -#' You can test for a specific `forecast_*` class using the appropriate -#' `is_forecast_*` function. +#' You can test for a specific `forecast_` class using the appropriate +#' `is_forecast_` function. #' #' @param x An R object. #' @return @@ -409,8 +350,7 @@ tail.forecast <- function(x, ...) { #' including "Forecast type", "Score columns", #' "Forecast unit". #' -#' @param x A forecast object (a validated data.table with predicted and -#' observed values, see [as_forecast()]). +#' @param x A forecast object #' @param ... Additional arguments for [print()]. #' @returns Returns `x` invisibly. #' @importFrom cli cli_inform cli_warn col_blue cli_text diff --git a/R/documentation-templates.R b/R/documentation-templates.R index 7b652b1b..77fb9d9f 100644 --- a/R/documentation-templates.R +++ b/R/documentation-templates.R @@ -1,57 +1,37 @@ -#' @title Documentation template for forecast types -#' -#' @details # Forecast types and input formats -#' -#' Various different forecast types / forecast formats are supported. At the -#' moment, those are: -#' - point forecasts -#' - binary forecasts ("soft binary classification") -#' - nominal forecasts ("soft classification with multiple unordered classes") -#' - Probabilistic forecasts in a quantile-based format (a forecast is -#' represented as a set of predictive quantiles) -#' - Probabilistic forecasts in a sample-based format (a forecast is represented -#' as a set of predictive samples) -#' -#' Forecast types are determined based on the columns present in the input data. -#' Here is an overview of the required format for each forecast type: -#' \if{html}{ -#' \out{
} -#' \figure{required-inputs.png}{options: style="width:750px;max-width:100\%;"} -#' \out{
} -#' } -#' \if{latex}{ -#' \figure{required-inputs.png} -#' } -#' -#' *All forecast types* require a data.frame or similar with columns `observed` -#' `predicted`, and `model`. -#' -#' *Point forecasts* require a column `observed` of type numeric and a column -#' `predicted` of type numeric. -#' -#' *Binary forecasts* require a column `observed` of type factor with exactly -#' two levels and a column `predicted` of type numeric with probabilities, -#' corresponding to the probability that `observed` is equal to the second -#' factor level. See details [here][brier_score()] for more information. -#' -#' *Nominal forecasts* require a column `observed` of type factor with N levels, -#' (where N is the number of possible outcomes), a column `predicted` of type -#' numeric with probabilities (which sum to one across all possible outcomes), -#' and a column `predicted_label` of type factor with N levels, denoting the -#' outcome for which a probability is given. Forecasts must be complete, i.e. -#' there must be a probability assigned to every possible outcome. +#' @title General information on creating a `forecast` object #' -#' *Quantile-based forecasts* require a column `observed` of type numeric, -#' a column `predicted` of type numeric, and a column `quantile_level` of type -#' numeric with quantile-levels (between 0 and 1). +#' @description +#' Process and validate a data.frame (or similar) or similar with forecasts +#' and observations. If the input passes all input checks, those functions will +#' be converted to a `forecast` object. A forecast object is a `data.table` with +#' a class `forecast` and an additional class that depends on the forecast type. #' -#' *Sample-based forecasts* require a column `observed` of type numeric, -#' a column `predicted` of type numeric, and a column `sample_id` of type -#' numeric with sample indices. +#' The arguments `observed`, `predicted`, etc. make it possible to rename +#' existing columns of the input data to match the required columns for a +#' forecast object. Using the argument `forecast_unit`, you can specify +#' the columns that uniquely identify a single forecast (and thereby removing +#' other, unneeded columns. See section "Forecast Unit" below for details). #' -#' For more information see the vignettes and the example data -#' ([example_quantile], [example_sample_continuous], [example_sample_discrete], -#' [example_point()], [example_binary], and [example_nominal]). +#' @param data A data.frame (or similar) with predicted and observed values. +#' See the details section of for additional information +#' on the required input format. +#' @param forecast_unit (optional) Name of the columns in `data` (after +#' any renaming of columns) that denote the unit of a +#' single forecast. See [get_forecast_unit()] for details. +#' If `NULL` (the default), all columns that are not required columns are +#' assumed to form the unit of a single forecast. If specified, all columns +#' that are not part of the forecast unit (or required columns) will be removed. +#' @param observed (optional) Name of the column in `data` that contains the +#' observed values. This column will be renamed to "observed". +#' @param predicted (optional) Name of the column in `data` that contains the +#' predicted values. This column will be renamed to "predicted". +#' @inheritSection forecast_types Forecast unit +#' @keywords as_forecast +#' @name as_forecast_doc_template +NULL + + +#' @title Documentation template for forecast types #' #' @details # Forecast unit #' @@ -75,9 +55,9 @@ #' then thinks that this column was relevant in defining the forecast unit. #' #' In order to avoid issues, we recommend setting the forecast unit explicitly, -#' usually through the `forecast_unit` argument in the [as_forecast()] -#' functions. This will drop unneeded columns, while making sure that all -#' necessary, 'protected columns' like "predicted" or "observed" are retained. +#' using the `forecast_unit` argument. This will simply drop unneeded columns, +#' while making sure that all necessary, 'protected columns' like "predicted" +#' or "observed" are retained. #' #' @name forecast_types #' @keywords internal @@ -150,3 +130,17 @@ NULL #' @name illustration-input-metric-quantile #' @keywords internal NULL + +#' Illustration of required inputs for nominal forecasts +#' @details # Input format +#' \if{html}{ +#' \out{
} +#' \figure{metrics-nominal.png}{options: style="width:750px;max-width:100\%;"} +#' \out{
} +#' } +#' \if{latex}{ +#' \figure{metrics-nominal.png} +#' } +#' @name illustration-input-metric-nominal +#' @keywords internal +NULL diff --git a/R/forecast-unit.R b/R/forecast-unit.R index ce38b242..f71655b0 100644 --- a/R/forecast-unit.R +++ b/R/forecast-unit.R @@ -6,8 +6,8 @@ #' This simple function keeps the columns specified in `forecast_unit` (plus #' additional protected columns, e.g. for observed values, predictions or #' quantile levels) and removes duplicate rows. `set_forecast_unit()` will -#' mainly be called when constructing a `forecast` object (see [as_forecast()]) -#' via the `forecast_unit` argument there. +#' mainly be called when constructing a `forecast` object +#' via the `forecast_unit` argument in `as_forecast_`. #' #' If not done explicitly, `scoringutils` attempts to determine the unit #' of a single forecast automatically by simply assuming that all column names @@ -15,7 +15,7 @@ #' behaviour, so setting the forecast unit explicitly can help make the code #' easier to debug and easier to read. #' -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' @param forecast_unit Character vector with the names of the columns that #' uniquely identify a single forecast. #' @importFrom cli cli_warn @@ -47,7 +47,7 @@ set_forecast_unit <- function(data, forecast_unit) { #' the columns that are protected, i.e. those returned by #' [get_protected_columns()] as well as the names of the metrics that were #' specified during scoring, if any. -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' @inheritSection forecast_types Forecast unit #' @return #' A character vector with the column names that define the unit of diff --git a/R/get-coverage.R b/R/get-coverage.R index c7a7c053..e2b0c90e 100644 --- a/R/get-coverage.R +++ b/R/get-coverage.R @@ -2,7 +2,7 @@ #' #' @description #' For a validated forecast object in a quantile-based format -#' (see [as_forecast()] for more information), this function computes: +#' (see [as_forecast_quantile()] for more information), this function computes: #' - interval coverage of central prediction intervals #' - quantile coverage for predictive quantiles #' - the deviation between desired and actual coverage (both for interval and diff --git a/R/get-duplicate-forecasts.R b/R/get-duplicate-forecasts.R index a1721fa1..17a8b3b8 100644 --- a/R/get-duplicate-forecasts.R +++ b/R/get-duplicate-forecasts.R @@ -5,7 +5,7 @@ #' instances where there is more than one forecast for the same prediction #' target. #' -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' @param counts Should the output show the number of duplicates per forecast #' unit instead of the individual duplicated rows? Default is `FALSE`. #' @returns A data.frame with all rows for which a duplicate forecast was found diff --git a/R/get-forecast-type.R b/R/get-forecast-type.R index 5d2eb5d1..63b5f23f 100644 --- a/R/get-forecast-type.R +++ b/R/get-forecast-type.R @@ -18,7 +18,7 @@ get_forecast_type <- function(forecast) { #' Assert that forecast type is as expected -#' @param data A forecast object (see [as_forecast()]). +#' @param data A forecast object. #' @param actual The actual forecast type of the data #' @param desired The desired forecast type of the data #' @inherit document_assert_functions return diff --git a/R/get-protected-columns.R b/R/get-protected-columns.R index e617672d..53c245da 100644 --- a/R/get-protected-columns.R +++ b/R/get-protected-columns.R @@ -3,7 +3,7 @@ #' @description Helper function to get the names of all columns in a data frame #' that are protected columns. #' -#' @inheritParams as_forecast +#' @inheritParams as_forecast_doc_template #' #' @return #' A character vector with the names of protected columns in the data. diff --git a/R/helper-quantile-interval-range.R b/R/helper-quantile-interval-range.R index 3d943a51..e7cf0fba 100644 --- a/R/helper-quantile-interval-range.R +++ b/R/helper-quantile-interval-range.R @@ -40,7 +40,7 @@ quantile_to_interval <- function(...) { #' @param forecast A data.table with forecasts in a quantile-based format (see -#' [as_forecast()]). +#' [as_forecast_quantile()]). #' @param format The format of the output. Either "long" or "wide". If "long" #' (the default), there will be a column `boundary` (with values either #' "upper" or "lower" and a column `interval_range` that contains the range of diff --git a/R/metrics-nominal.R b/R/metrics-nominal.R index 8c9d792e..3a2d7652 100644 --- a/R/metrics-nominal.R +++ b/R/metrics-nominal.R @@ -79,6 +79,7 @@ assert_input_nominal <- function(observed, predicted, predicted_label) { #' @param predicted_label A factor of length N, denoting the outcome that the #' probabilities in `predicted` correspond to. #' @returns A numeric vector of size n with log scores +#' @inheritSection illustration-input-metric-nominal Input format #' @importFrom methods hasArg #' @export #' @keywords metric diff --git a/R/metrics.R b/R/metrics.R index 4713711b..8a0bc28c 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -42,10 +42,10 @@ select_metrics <- function(metrics, select = NULL, exclude = NULL) { #' Get metrics #' #' @description -#' Generic function to to obtain default metrics availble for scoring or metrics +#' Generic function to to obtain default metrics available for scoring or metrics #' that were used for scoring. #' -#' - If called on `forecast` object it returns a list of functions that can be +#' - If called on a `forecast` object it returns a list of functions that can be #' used for scoring. #' - If called on a `scores` object (see [score()]), it returns a character vector #' with the names of the metrics that were used for scoring. @@ -56,9 +56,6 @@ select_metrics <- function(metrics, select = NULL, exclude = NULL) { #' #' @param x A `forecast` or `scores` object. #' @param ... Additional arguments passed to the method. -#' @details -#' See [as_forecast()] for more information on `forecast` objects and [score()] -#' for more information on `scores` objects. #' #' @family get_metrics functions #' @keywords handle-metrics diff --git a/R/score.R b/R/score.R index ff783996..00300d31 100644 --- a/R/score.R +++ b/R/score.R @@ -1,19 +1,21 @@ #' @title Evaluate forecasts #' @description `score()` applies a selection of scoring metrics to a forecast -#' object (a data.table with forecasts and observations) (see [as_forecast()]). +#' object. #' `score()` is a generic that dispatches to different methods depending on the #' class of the input data. #' -#' See the *Forecast types and input formats* section for more information on -#' forecast types and input formats. +#' See [as_forecast_binary()], [as_forecast_quantile()] etc. for information on +#' how to create a forecast object. +#' +#' See [get_forecast_unit()] for more information on the concept of a forecast +#' unit. +#' #' For additional help and examples, check out the [Getting Started #' Vignette](https://epiforecasts.io/scoringutils/articles/scoringutils.html) as #' well as the paper [Evaluating Forecasts with scoringutils in #' R](https://arxiv.org/abs/2205.07090). -#' @inheritSection forecast_types Forecast types and input formats -#' @inheritSection forecast_types Forecast unit #' @param forecast A forecast object (a validated data.table with predicted and -#' observed values, see [as_forecast()]). +#' observed values). #' @param metrics A named list of scoring functions. Names will be used as #' column names in the output. See [get_metrics()] for more information on the #' default metrics used. See the *Customising metrics* section below for diff --git a/inst/manuscript/manuscript.Rmd b/inst/manuscript/manuscript.Rmd index 5ecc7025..40470d6b 100644 --- a/inst/manuscript/manuscript.Rmd +++ b/inst/manuscript/manuscript.Rmd @@ -162,7 +162,7 @@ Forecasts differ in the exact prediction task and in how the forecaster chooses - "Point" denotes a forecast for a continuous or discrete outcome variable that is represented by a single number. - "Binary" denotes a probability forecast for a binary (yes/no) outcome variable. This is sometimes also called "soft binary classification". - "Nominal" denotes a probability forecast for a variable where the outcome can assume one of multiple unordered classes. This represents a generalisation of binary forecasts to multiple possible outcomes. -- "Quantile" or "quantile-based" is used to denote a probabilistic forecast for a continuous or discrete outcome variable, with the forecast distribution represented by a set of predictive quantiles. While a single quantile would already satisfy the requirements for a quantile-based forecast, most scoring rules expect a set of quantiles which are symmetric around the median (thus forming the lower and upper bounds of central "prediction intervals") and will return `NA` if this is not the case. +- "Quantile" or "quantile-based" is used to denote a probabilistic forecast for a continuous or discrete outcome variable, with the forecast distribution represented by a set of predictive quantiles. While a single quantile would already satisfy the requirements for a quantile-based forecast, most scoring rules expect a set of quantiles which are symmetric around the median (thus forming the lower and upper bounds of central "prediction intervals") and will error (or return `NA` if `na.rm = TRUE`) if this is not the case. - "Sample" or "sample-based" is used to denote a probabilistic forecast for a continuous or discrete outcome variable, with the forecast represented by a finite set of samples drawn from the predictive distribution. A single sample technically suffices, but would lead to very imprecise results. \begin{table}[h] @@ -207,7 +207,7 @@ The starting point for working with \pkg{scoringutils} is usually a \code{data.f Table \ref{tab:input-score} shows the expected input format for each forecast type. -The package contains example data for each forecast type, which can serve as an orientation for the correct formats. The example data sets are exported as `example_point` and `example_binary`, `example_nominal`, `example_quantile`, `example_sample_continuous`, and `example_sample_discrete`. For illustrative purposes, the example data also contains some rows with only observations and no corresponding predictions. All example data in the package use a column called `model` to denote the name of the model/forecaster that generated the forecast. This is also the default in some function, but does not reflect a hard requirement. Input formats for the scoring rules that can be called directly follow the same convention, with inputs expected to be vectors or matrices. +The package contains example data for each forecast type, which can serve as an orientation for the correct formats. The example data sets are exported as `example_point` and `example_binary`, `example_nominal`, `example_quantile`, `example_sample_continuous`, and `example_sample_discrete`. For illustrative purposes, the example data also contains some rows with only observations and no corresponding predictions. All example data in the package use a column called `model` to denote the name of the model/forecaster that generated the forecast. This is also the default in some functions, but does not reflect a hard requirement. Input formats for the scoring rules that can be called directly follow the same convention, with inputs expected to be vectors or matrices. ### The unit of a single forecast @@ -246,13 +246,13 @@ The argument `forecast_unit` allows the user to manually set the unit of a singl ## Diagnostic helper functions -Various helper functions are available to diagnose and fix issues with the input data. The most important one is `print()`. Once a forecast object has successfully been created, diagnostic information will automatically be added to the output when printing a forecast object. This information includes the forecast type, the forecast unit, and additional information in case the object fails validations. +Various helper functions are available to diagnose and fix issues with the input data. A simple one is the `print()` method for forecast objects. Once a forecast object has successfully been created, the forecast type and the forecast unit will automatically be added to the output when printing. ```{r} print(forecast_quantile, 2) ``` -Internally, the print method calls the functions \fct{get\_forecast\_type}, \fct{get\_forecast\_unit} and \fct{assert\_forecast}. \fct{get\_forecast\_type} and \fct{get\_forecast\_unit} work on either an unvalidated \code{data.frame} (or similar) or on an already validated forecast object. They return the forecast type and the forecast unit, respectively, as inferred from the input data. \fct{assert\_forecast} asserts that an existing forecast object passes all validations and returns `invisble(NULL)` if the forecast object is valid (and otherwise errors). \fct{validate\_forecast} is similar to \fct{assert\_forecast}, but returns the forecast object in case of success instead of `invisble(NULL)`, meaning that it can be used in a pipe. +Internally, the print method calls the functions \fct{get\_forecast\_type} and \fct{get\_forecast\_unit}. Both functions can also be accesssed independently. \fct{get\_forecast\_type} and \fct{get\_forecast\_unit} work on either an unvalidated \code{data.frame} (or similar) or on an already validated forecast object. They return the forecast type and the forecast unit, respectively, as inferred from the input data. \fct{assert\_forecast} asserts that an existing forecast object passes all validations and returns `invisble(NULL)` if the forecast object is valid (and otherwise errors). One common issue that causes transformation to a `forecast` object to fail are "duplicates" in the data. \pkg{scoringutils} strictly requires that there be only one forecast per forecast unit and only one predicted value per quantile level or sample id within a single forecast. Duplicates usually occur if the forecast unit is misspecified. For example, if we removed the column `target_type` from the example data, we would now have two forecasts (one for cases and one for deaths of COVID-19) that appear to have the same forecast unit (since the information that distinguished between case and death forecasts is no longer there). The function \fct{get\_duplicate\_forecasts} returns duplicate rows for the user to inspect. To remedy the issue, the user needs to add additional columns that uniquely identify a single forecast. @@ -654,10 +654,6 @@ The following section gives an overview of how \pkg{scoringutils} constructs for \fct{as\_forecast\_...} (optionally) renames existing columns to conform with the requirements for forecast objects, (optionally) sets the forecast unit, constructs the class and validates the input. The process is illustrated in Figure \ref{fig:flowchart-validation}. -```{r flowchart-validation, echo = FALSE, fig.pos = "!h", out.width="100%", fig.cap= "Illustration of the process of creating a `forecast` object.", fig.show="hold"} -include_graphics("output/flowchart-create-object.png") -``` - \clearpage \section{Comparing different calibration plots} diff --git a/inst/manuscript/manuscript.pdf b/inst/manuscript/manuscript.pdf index 314c140e..ae6999ef 100644 Binary files a/inst/manuscript/manuscript.pdf and b/inst/manuscript/manuscript.pdf differ diff --git a/inst/manuscript/manuscript.tex b/inst/manuscript/manuscript.tex index 55f29219..fefffc4d 100644 --- a/inst/manuscript/manuscript.tex +++ b/inst/manuscript/manuscript.tex @@ -322,8 +322,9 @@ \subsection{Input formats and types of While a single quantile would already satisfy the requirements for a quantile-based forecast, most scoring rules expect a set of quantiles which are symmetric around the median (thus forming the lower and - upper bounds of central ``prediction intervals'') and will return - \texttt{NA} if this is not the case. + upper bounds of central ``prediction intervals'') and will error (or + return \texttt{NA} if \texttt{na.rm\ =\ TRUE}) if this is not the + case. \item ``Sample'' or ``sample-based'' is used to denote a probabilistic forecast for a continuous or discrete outcome variable, with the @@ -392,7 +393,7 @@ \subsection{Input formats and types of example data also contains some rows with only observations and no corresponding predictions. All example data in the package use a column called \texttt{model} to denote the name of the model/forecaster that -generated the forecast. This is also the default in some function, but +generated the forecast. This is also the default in some functions, but does not reflect a hard requirement. Input formats for the scoring rules that can be called directly follow the same convention, with inputs expected to be vectors or matrices. @@ -486,11 +487,10 @@ \subsection{Diagnostic helper functions}\label{diagnostic-helper-functions} Various helper functions are available to diagnose and fix issues with -the input data. The most important one is \texttt{print()}. Once a -forecast object has successfully been created, diagnostic information -will automatically be added to the output when printing a forecast -object. This information includes the forecast type, the forecast unit, -and additional information in case the object fails validations. +the input data. A simple one is the \texttt{print()} method for forecast +objects. Once a forecast object has successfully been created, the +forecast type and the forecast unit will automatically be added to the +output when printing. \begin{CodeChunk} \begin{CodeInput} @@ -524,18 +524,15 @@ \subsection{Diagnostic helper \end{CodeChunk} Internally, the print method calls the functions -\code{get\_forecast\_type()}, \code{get\_forecast\_unit()} and -\code{assert\_forecast()}. \code{get\_forecast\_type()} and -\code{get\_forecast\_unit()} work on either an unvalidated -\code{data.frame} (or similar) or on an already validated forecast -object. They return the forecast type and the forecast unit, -respectively, as inferred from the input data. \code{assert\_forecast()} -asserts that an existing forecast object passes all validations and -returns \texttt{invisble(NULL)} if the forecast object is valid (and -otherwise errors). \code{validate\_forecast()} is similar to -\code{assert\_forecast()}, but returns the forecast object in case of -success instead of \texttt{invisble(NULL)}, meaning that it can be used -in a pipe. +\code{get\_forecast\_type()} and \code{get\_forecast\_unit()}. Both +functions can also be accesssed independently. +\code{get\_forecast\_type()} and \code{get\_forecast\_unit()} work on +either an unvalidated \code{data.frame} (or similar) or on an already +validated forecast object. They return the forecast type and the +forecast unit, respectively, as inferred from the input data. +\code{assert\_forecast()} asserts that an existing forecast object +passes all validations and returns \texttt{invisble(NULL)} if the +forecast object is valid (and otherwise errors). One common issue that causes transformation to a \texttt{forecast} object to fail are ``duplicates'' in the data. \pkg{scoringutils} @@ -737,9 +734,11 @@ \subsubsection{Probabilistic calibration and PIT \begin{CodeInput} R> example_sample_continuous |> + as_forecast_sample() |> -+ get_pit(by = c("model", "target_type")) |> -+ plot_pit() + -+ facet_grid(target_type ~ model) ++ get_pit_histogram(by = c("model", "target_type")) |> ++ ggplot(aes(x = mid, y = density)) + ++ geom_col() + ++ facet_grid(target_type ~ model) + ++ labs(x = "Quantile", "Density") \end{CodeInput} \begin{figure}[!h] @@ -989,7 +988,7 @@ \subsection{Adding relative skill scores based on pairwise \begin{CodeChunk} \begin{figure}[!h] -{\centering \includegraphics[width=1\linewidth]{output/pairwise-comparisons} +{\centering \includegraphics[width=1\linewidth]{../../man/figures/pairwise-illustration} } @@ -1401,17 +1400,6 @@ \section{Constructing and validating forecast objects} the forecast unit, constructs the class and validates the input. The process is illustrated in Figure \ref{fig:flowchart-validation}. -\begin{CodeChunk} -\begin{figure}[!h] - -{\centering \includegraphics[width=1\linewidth]{output/flowchart-create-object} - -} - -\caption[Illustration of the process of creating a `forecast` object]{Illustration of the process of creating a `forecast` object.}\label{fig:flowchart-validation} -\end{figure} -\end{CodeChunk} - \clearpage \section{Comparing different calibration plots} @@ -1423,19 +1411,19 @@ \section{Comparing different calibration plots} \begin{CodeChunk} \begin{CodeOutput} - observed id predicted sample_id model - - 1: 0.6286418 1 1.16131695 1 Pred: N(0, 1) - 2: 0.6286418 1 -0.99315186 2 Pred: N(0, 1) - 3: 0.6286418 1 0.34728150 3 Pred: N(0, 1) - 4: 0.6286418 1 -0.04181622 4 Pred: N(0, 1) - 5: 0.6286418 1 0.50687585 5 Pred: N(0, 1) - --- -15999996: 0.1889872 2000 0.16620035 1996 Pred: N(0, 0.5) -15999997: 0.1889872 2000 -0.11813551 1997 Pred: N(0, 0.5) -15999998: 0.1889872 2000 0.07934558 1998 Pred: N(0, 0.5) -15999999: 0.1889872 2000 1.21359187 1999 Pred: N(0, 0.5) -16000000: 0.1889872 2000 -0.18941563 2000 Pred: N(0, 0.5) + observed id predicted sample_id model + + 1: -1.0981862 1 2.16551331 1 Pred: N(0, 1) + 2: -1.0981862 1 -1.72018902 2 Pred: N(0, 1) + 3: -1.0981862 1 0.83321201 3 Pred: N(0, 1) + 4: -1.0981862 1 0.28800142 4 Pred: N(0, 1) + 5: -1.0981862 1 -0.12326236 5 Pred: N(0, 1) + --- +15999996: -0.6612465 2000 -0.27061649 1996 Pred: N(0, 0.5) +15999997: -0.6612465 2000 0.06780212 1997 Pred: N(0, 0.5) +15999998: -0.6612465 2000 0.05126678 1998 Pred: N(0, 0.5) +15999999: -0.6612465 2000 0.70752951 1999 Pred: N(0, 0.5) +16000000: -0.6612465 2000 0.34493113 2000 Pred: N(0, 0.5) \end{CodeOutput} \begin{figure}[!h] diff --git a/man/apply_metrics.Rd b/man/apply_metrics.Rd index d43b4849..28a941aa 100644 --- a/man/apply_metrics.Rd +++ b/man/apply_metrics.Rd @@ -8,7 +8,7 @@ apply_metrics(forecast, metrics, ...) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{metrics}{A named list of scoring functions. Names will be used as column names in the output. See \code{\link[=get_metrics]{get_metrics()}} for more information on the diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd deleted file mode 100644 index 8e115215..00000000 --- a/man/as_forecast.Rd +++ /dev/null @@ -1,158 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class-forecast.R -\name{as_forecast} -\alias{as_forecast} -\title{General information on creating a \code{forecast} object} -\arguments{ -\item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} - -\item{forecast_unit}{(optional) Name of the columns in \code{data} (after -any renaming of columns) that denote the unit of a -single forecast. See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for details. -If \code{NULL} (the default), all columns that are not required columns are -assumed to form the unit of a single forecast. If specified, all columns -that are not part of the forecast unit (or required columns) will be removed.} - -\item{observed}{(optional) Name of the column in \code{data} that contains the -observed values. This column will be renamed to "observed".} - -\item{predicted}{(optional) Name of the column in \code{data} that contains the -predicted values. This column will be renamed to "predicted".} -} -\value{ -Depending on the forecast type, an object of the following class will be -returned: -\itemize{ -\item \code{forecast_binary} for binary forecasts -\item \code{forecast_point} for point forecasts -\item \code{forecast_sample} for sample-based forecasts -\item \code{forecast_quantile} for quantile-based forecasts -} -} -\description{ -There are several \verb{as_forecast_()} functions to process and validate -a data.frame (or similar) or similar with forecasts and observations. If -the input passes all input checks, those functions will be converted -to a \code{forecast} object. A forecast object is a \code{data.table} with a -class \code{forecast} and an additional class that depends on the forecast type. -Every forecast type has its own \verb{as_forecast_()} function. -See the details section below for more information -on the expected input formats. - -The \verb{as_forecast_()} functions give users some control over how their -data is parsed. -Using the arguments \code{observed}, \code{predicted}, etc. users can rename -existing columns of their input data to match the required columns for a -forecast object. Using the argument \code{forecast_unit}, users can specify the -the columns that uniquely identify a single forecast (and remove the others, -see docs for the internal \code{\link[=set_forecast_unit]{set_forecast_unit()}} for details). - -The following functions are available: -\itemize{ -\item \code{\link[=as_forecast_point]{as_forecast_point()}} -\item \code{\link[=as_forecast_binary]{as_forecast_binary()}} -\item \code{\link[=as_forecast_sample]{as_forecast_sample()}} -\item \code{\link[=as_forecast_quantile]{as_forecast_quantile()}} -} -} -\section{Forecast types and input formats}{ -Various different forecast types / forecast formats are supported. At the -moment, those are: -\itemize{ -\item point forecasts -\item binary forecasts ("soft binary classification") -\item nominal forecasts ("soft classification with multiple unordered classes") -\item Probabilistic forecasts in a quantile-based format (a forecast is -represented as a set of predictive quantiles) -\item Probabilistic forecasts in a sample-based format (a forecast is represented -as a set of predictive samples) -} - -Forecast types are determined based on the columns present in the input data. -Here is an overview of the required format for each forecast type: -\if{html}{ - \out{
} - \figure{required-inputs.png}{options: style="width:750px;max-width:100\%;"} - \out{
} -} -\if{latex}{ - \figure{required-inputs.png} -} - -\emph{All forecast types} require a data.frame or similar with columns \code{observed} -\code{predicted}, and \code{model}. - -\emph{Point forecasts} require a column \code{observed} of type numeric and a column -\code{predicted} of type numeric. - -\emph{Binary forecasts} require a column \code{observed} of type factor with exactly -two levels and a column \code{predicted} of type numeric with probabilities, -corresponding to the probability that \code{observed} is equal to the second -factor level. See details \link[=brier_score]{here} for more information. - -\emph{Nominal forecasts} require a column \code{observed} of type factor with N levels, -(where N is the number of possible outcomes), a column \code{predicted} of type -numeric with probabilities (which sum to one across all possible outcomes), -and a column \code{predicted_label} of type factor with N levels, denoting the -outcome for which a probability is given. Forecasts must be complete, i.e. -there must be a probability assigned to every possible outcome. - -\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{quantile_level} of type -numeric with quantile-levels (between 0 and 1). - -\emph{Sample-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{sample_id} of type -numeric with sample indices. - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_sample_continuous}, \link{example_sample_discrete}, -\code{\link[=example_point]{example_point()}}, \link{example_binary}, and \link{example_nominal}). -} - -\section{Forecast unit}{ -In order to score forecasts, \code{scoringutils} needs to know which of the rows -of the data belong together and jointly form a single forecasts. This is -easy e.g. for point forecast, where there is one row per forecast. For -quantile or sample-based forecasts, however, there are multiple rows that -belong to a single forecast. - -The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the -combination of columns that uniquely identify a single forecast. -For example, we could have forecasts made by different models in various -locations at different time points, each for several weeks into the future. -The forecast unit could then be described as -\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. -\code{scoringutils} automatically tries to determine the unit of a single -forecast. It uses all existing columns for this, which means that no columns -must be present that are unrelated to the forecast unit. As a very simplistic -example, if you had an additional row, "even", that is one if the row number -is even and zero otherwise, then this would mess up scoring as \code{scoringutils} -then thinks that this column was relevant in defining the forecast unit. - -In order to avoid issues, we recommend setting the forecast unit explicitly, -usually through the \code{forecast_unit} argument in the \code{\link[=as_forecast]{as_forecast()}} -functions. This will drop unneeded columns, while making sure that all -necessary, 'protected columns' like "predicted" or "observed" are retained. -} - -\examples{ -as_forecast_binary(example_binary) -as_forecast_quantile( - example_quantile, - forecast_unit = c("model", "target_type", "target_end_date", - "horizon", "location") -) -} -\seealso{ -Other functions to create forecast objects: -\code{\link{as_forecast_binary}()}, -\code{\link{as_forecast_nominal}()}, -\code{\link{as_forecast_point}()}, -\code{\link{as_forecast_quantile}()}, -\code{\link{as_forecast_sample}()} -} -\concept{functions to create forecast objects} -\keyword{as_forecast} diff --git a/man/as_forecast_binary.Rd b/man/as_forecast_binary.Rd index 4e11a0e1..388c4aa7 100644 --- a/man/as_forecast_binary.Rd +++ b/man/as_forecast_binary.Rd @@ -13,8 +13,8 @@ as_forecast_binary( } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{(optional) Name of the columns in \code{data} (after any renaming of columns) that denote the unit of a @@ -29,13 +29,76 @@ observed values. This column will be renamed to "observed".} \item{predicted}{(optional) Name of the column in \code{data} that contains the predicted values. This column will be renamed to "predicted".} } +\value{ +A \code{forecast} object of class \code{forecast_binary} +} \description{ -Create a \code{forecast} object for binary forecasts. See more information on -forecast types and expected input formats by calling \verb{?}\code{\link[=as_forecast]{as_forecast()}}. +Process and validate a data.frame (or similar) or similar with forecasts +and observations. If the input passes all input checks, those functions will +be converted to a \code{forecast} object. A forecast object is a \code{data.table} with +a class \code{forecast} and an additional class that depends on the forecast type. + +The arguments \code{observed}, \code{predicted}, etc. make it possible to rename +existing columns of the input data to match the required columns for a +forecast object. Using the argument \code{forecast_unit}, you can specify +the columns that uniquely identify a single forecast (and thereby removing +other, unneeded columns. See section "Forecast Unit" below for details). +} +\section{Required input}{ +The input needs to be a data.frame or similar with the following columns: +\itemize{ +\item \code{observed}: \code{factor} with exactly two levels representing the observed +values. The highest factor level is assumed to be the reference level. +This means that corresponding value in \code{predicted} represent the +probability that the observed value is equal to the highest factor level. +\item \code{predicted}: \code{numeric} with predicted probabilities, representing +the probability that the corresponding value in \code{observed} is equal to +the highest available factor level. +} + +For convenience, we recommend an additional column \code{model} holding the name +of the forecaster or model that produced a prediction, but this is not +strictly necessary. + +See the \link{example_binary} data set for an example. +} + +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to a single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend setting the forecast unit explicitly, +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. +} + +\examples{ +as_forecast_binary( + example_binary, + predicted = "predicted", + forecast_unit = c("model", "target_type", "target_end_date", + "horizon", "location") +) } \seealso{ Other functions to create forecast objects: -\code{\link{as_forecast}}, \code{\link{as_forecast_nominal}()}, \code{\link{as_forecast_point}()}, \code{\link{as_forecast_quantile}()}, diff --git a/man/as_forecast_doc_template.Rd b/man/as_forecast_doc_template.Rd new file mode 100644 index 00000000..69e784d2 --- /dev/null +++ b/man/as_forecast_doc_template.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{as_forecast_doc_template} +\alias{as_forecast_doc_template} +\title{General information on creating a \code{forecast} object} +\arguments{ +\item{data}{A data.frame (or similar) with predicted and observed values. +See the details section of for additional information +on the required input format.} + +\item{forecast_unit}{(optional) Name of the columns in \code{data} (after +any renaming of columns) that denote the unit of a +single forecast. See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for details. +If \code{NULL} (the default), all columns that are not required columns are +assumed to form the unit of a single forecast. If specified, all columns +that are not part of the forecast unit (or required columns) will be removed.} + +\item{observed}{(optional) Name of the column in \code{data} that contains the +observed values. This column will be renamed to "observed".} + +\item{predicted}{(optional) Name of the column in \code{data} that contains the +predicted values. This column will be renamed to "predicted".} +} +\description{ +Process and validate a data.frame (or similar) or similar with forecasts +and observations. If the input passes all input checks, those functions will +be converted to a \code{forecast} object. A forecast object is a \code{data.table} with +a class \code{forecast} and an additional class that depends on the forecast type. + +The arguments \code{observed}, \code{predicted}, etc. make it possible to rename +existing columns of the input data to match the required columns for a +forecast object. Using the argument \code{forecast_unit}, you can specify +the columns that uniquely identify a single forecast (and thereby removing +other, unneeded columns. See section "Forecast Unit" below for details). +} +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to a single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend setting the forecast unit explicitly, +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. +} + +\keyword{as_forecast} diff --git a/man/as_forecast_generic.Rd b/man/as_forecast_generic.Rd index 744f0774..9cb52e5b 100644 --- a/man/as_forecast_generic.Rd +++ b/man/as_forecast_generic.Rd @@ -13,8 +13,8 @@ as_forecast_generic( } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{(optional) Name of the columns in \code{data} (after any renaming of columns) that denote the unit of a diff --git a/man/as_forecast_nominal.Rd b/man/as_forecast_nominal.Rd index fc240f60..362e6b17 100644 --- a/man/as_forecast_nominal.Rd +++ b/man/as_forecast_nominal.Rd @@ -14,8 +14,8 @@ as_forecast_nominal( } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{(optional) Name of the columns in \code{data} (after any renaming of columns) that denote the unit of a @@ -32,17 +32,87 @@ predicted values. This column will be renamed to "predicted".} \item{predicted_label}{(optional) Name of the column in \code{data} that denotes the outcome to which a predicted probability corresponds to. -This column will be renamed to "predicted_label". Only applicable to -nominal forecasts.} +This column will be renamed to "predicted_label".} +} +\value{ +A \code{forecast} object of class \code{forecast_nominal} } \description{ -Nominal forecasts are a form of categorical forecasts where the possible -outcomes that the observed values can assume are not ordered. In that sense, -Nominal forecasts represent a generalisation of binary forecasts. +Process and validate a data.frame (or similar) or similar with forecasts +and observations. If the input passes all input checks, those functions will +be converted to a \code{forecast} object. A forecast object is a \code{data.table} with +a class \code{forecast} and an additional class that depends on the forecast type. + +The arguments \code{observed}, \code{predicted}, etc. make it possible to rename +existing columns of the input data to match the required columns for a +forecast object. Using the argument \code{forecast_unit}, you can specify +the columns that uniquely identify a single forecast (and thereby removing +other, unneeded columns. See section "Forecast Unit" below for details). +} +\details{ +Nominal forecasts are a form of categorical forecasts and represent a +generalisation of binary forecasts to multiple outcomes. The possible +outcomes that the observed values can assume are not ordered. +} +\section{Required input}{ +The input needs to be a data.frame or similar with the following columns: +\itemize{ +\item \code{observed}: Column with observed values of type \code{factor} with N levels, +where N is the number of possible outcomes. +The levels of the factor represent the possible outcomes that +the observed values can assume. +\item \code{predicted}: \code{numeric} column with predicted probabilities. The values +represent the probability that the observed value is equal to the factor +level denoted in \code{predicted_label}. Note that forecasts must be complete, +i.e. there must be a probability assigned to every possible outcome and +those probabilities must sum to one. +\item \code{predicted_label}: \code{factor} with N levels, denoting the outcome that the +probabilities in \code{predicted} correspond to. +} + +For convenience, we recommend an additional column \code{model} holding the name +of the forecaster or model that produced a prediction, but this is not +strictly necessary. + +See the \link{example_nominal} data set for an example. +} + +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to a single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend setting the forecast unit explicitly, +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. +} + +\examples{ +as_forecast_nominal( + na.omit(example_nominal), + predicted = "predicted", + forecast_unit = c("model", "target_type", "target_end_date", + "horizon", "location") +) } \seealso{ Other functions to create forecast objects: -\code{\link{as_forecast}}, \code{\link{as_forecast_binary}()}, \code{\link{as_forecast_point}()}, \code{\link{as_forecast_quantile}()}, diff --git a/man/as_forecast_point.Rd b/man/as_forecast_point.Rd index 7ab9fae9..9d9ac2bd 100644 --- a/man/as_forecast_point.Rd +++ b/man/as_forecast_point.Rd @@ -21,8 +21,8 @@ as_forecast_point(data, ...) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{...}{Unused} @@ -39,16 +39,29 @@ observed values. This column will be renamed to "observed".} \item{predicted}{(optional) Name of the column in \code{data} that contains the predicted values. This column will be renamed to "predicted".} } +\value{ +A \code{forecast} object of class \code{forecast_point} +} \description{ -Create a \code{forecast} object for point forecasts. See more information on -forecast types and expected input formats by calling \verb{?}\code{\link[=as_forecast]{as_forecast()}}. - When converting a \code{forecast_quantile} object into a \code{forecast_point} object, the 0.5 quantile is extracted and returned as the point forecast. } +\section{Required input}{ +The input needs to be a data.frame or similar with the following columns: +\itemize{ +\item \code{observed}: Column of type \code{numeric} with observed values. +\item \code{predicted}: Column of type \code{numeric} with predicted values. +} + +For convenience, we recommend an additional column \code{model} holding the name +of the forecaster or model that produced a prediction, but this is not +strictly necessary. + +See the \link{example_point} data set for an example. +} + \seealso{ Other functions to create forecast objects: -\code{\link{as_forecast}}, \code{\link{as_forecast_binary}()}, \code{\link{as_forecast_nominal}()}, \code{\link{as_forecast_quantile}()}, diff --git a/man/as_forecast_quantile.Rd b/man/as_forecast_quantile.Rd index 13f88a51..c5cd8c5e 100644 --- a/man/as_forecast_quantile.Rd +++ b/man/as_forecast_quantile.Rd @@ -27,8 +27,8 @@ as_forecast_quantile(data, ...) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{...}{Unused} @@ -56,19 +56,83 @@ quantiles will be computed. Corresponds to the \code{probs} argument in \item{type}{Type argument passed down to the quantile function. For more information, see \code{\link[=quantile]{quantile()}}.} } +\value{ +A \code{forecast} object of class \code{forecast_quantile} +} \description{ -Create a \code{forecast} object for quantile-based forecasts. See more information -on forecast types and expected input formats by calling \verb{?}\code{\link[=as_forecast]{as_forecast()}}. +Process and validate a data.frame (or similar) or similar with forecasts +and observations. If the input passes all input checks, those functions will +be converted to a \code{forecast} object. A forecast object is a \code{data.table} with +a class \code{forecast} and an additional class that depends on the forecast type. + +The arguments \code{observed}, \code{predicted}, etc. make it possible to rename +existing columns of the input data to match the required columns for a +forecast object. Using the argument \code{forecast_unit}, you can specify +the columns that uniquely identify a single forecast (and thereby removing +other, unneeded columns. See section "Forecast Unit" below for details). +} +\section{Required input}{ +The input needs to be a data.frame or similar with the following columns: +\itemize{ +\item \code{observed}: Column of type \code{numeric} with observed values. +\item \code{predicted}: Column of type \code{numeric} with predicted values. Predicted +values represent quantiles of the predictive distribution. +\item \code{quantile_level}: Column of type \code{numeric}, denoting the quantile level of +the corresponding predicted value. +Quantile levels must be between 0 and 1. +} + +For convenience, we recommend an additional column \code{model} holding the name +of the forecaster or model that produced a prediction, but this is not +strictly necessary. + +See the \link{example_quantile} data set for an example. +} +\section{Converting from \code{forecast_sample} to \code{forecast_quantile}}{ When creating a \code{forecast_quantile} object from a \code{forecast_sample} object, the quantiles are estimated by computing empircal quantiles from the samples via \code{\link[=quantile]{quantile()}}. Note that empirical quantiles are a biased estimator for the true quantiles in particular in the tails of the distribution and when the number of available samples is low. } + +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to a single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend setting the forecast unit explicitly, +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. +} + +\examples{ +as_forecast_quantile( + example_quantile, + predicted = "predicted", + forecast_unit = c("model", "target_type", "target_end_date", + "horizon", "location") +) +} \seealso{ Other functions to create forecast objects: -\code{\link{as_forecast}}, \code{\link{as_forecast_binary}()}, \code{\link{as_forecast_nominal}()}, \code{\link{as_forecast_point}()}, diff --git a/man/as_forecast_sample.Rd b/man/as_forecast_sample.Rd index f298061d..01a052e7 100644 --- a/man/as_forecast_sample.Rd +++ b/man/as_forecast_sample.Rd @@ -14,8 +14,8 @@ as_forecast_sample( } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{(optional) Name of the columns in \code{data} (after any renaming of columns) that denote the unit of a @@ -31,15 +31,69 @@ observed values. This column will be renamed to "observed".} predicted values. This column will be renamed to "predicted".} \item{sample_id}{(optional) Name of the column in \code{data} that contains the -sample id. This column will be renamed to "sample_id". Only applicable to -sample-based forecasts.} +sample id. This column will be renamed to "sample_id".} +} +\value{ +A \code{forecast} object of class \code{forecast_sample} } \description{ -Create a \code{forecast} object for sample-based forecasts +Process and validate a data.frame (or similar) or similar with forecasts +and observations. If the input passes all input checks, those functions will +be converted to a \code{forecast} object. A forecast object is a \code{data.table} with +a class \code{forecast} and an additional class that depends on the forecast type. + +The arguments \code{observed}, \code{predicted}, etc. make it possible to rename +existing columns of the input data to match the required columns for a +forecast object. Using the argument \code{forecast_unit}, you can specify +the columns that uniquely identify a single forecast (and thereby removing +other, unneeded columns. See section "Forecast Unit" below for details). +} +\section{Required input}{ +The input needs to be a data.frame or similar with the following columns: +\itemize{ +\item \code{observed}: Column of type \code{numeric} with observed values. +\item \code{predicted}: Column of type \code{numeric} with predicted values. Predicted +values represent random samples from the predictive distribution. +\item \code{sample_id}: Column of any type with unique identifiers +(unique within a single forecast) for each sample. +} + +For convenience, we recommend an additional column \code{model} holding the name +of the forecaster or model that produced a prediction, but this is not +strictly necessary. + +See the \link{example_sample_continuous} and \link{example_sample_discrete} data set +for an example } + +\section{Forecast unit}{ +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to a single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend setting the forecast unit explicitly, +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. +} + \seealso{ Other functions to create forecast objects: -\code{\link{as_forecast}}, \code{\link{as_forecast_binary}()}, \code{\link{as_forecast_nominal}()}, \code{\link{as_forecast_point}()}, diff --git a/man/assert_forecast.Rd b/man/assert_forecast.Rd index 0b73a05e..3f13bcc1 100644 --- a/man/assert_forecast.Rd +++ b/man/assert_forecast.Rd @@ -25,7 +25,7 @@ assert_forecast(forecast, forecast_type = NULL, verbose = TRUE, ...) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{forecast_type}{(optional) The forecast type you expect the forecasts to have. If the forecast type as determined by \code{scoringutils} based on the @@ -45,64 +45,12 @@ Returns \code{NULL} invisibly. } \description{ Assert that an object is a forecast object (i.e. a \code{data.table} with a class -\code{forecast} and an additional class \verb{forecast_*} corresponding to the forecast -type). -} -\section{Forecast types and input formats}{ -Various different forecast types / forecast formats are supported. At the -moment, those are: -\itemize{ -\item point forecasts -\item binary forecasts ("soft binary classification") -\item nominal forecasts ("soft classification with multiple unordered classes") -\item Probabilistic forecasts in a quantile-based format (a forecast is -represented as a set of predictive quantiles) -\item Probabilistic forecasts in a sample-based format (a forecast is represented -as a set of predictive samples) -} - -Forecast types are determined based on the columns present in the input data. -Here is an overview of the required format for each forecast type: -\if{html}{ - \out{
} - \figure{required-inputs.png}{options: style="width:750px;max-width:100\%;"} - \out{
} -} -\if{latex}{ - \figure{required-inputs.png} -} - -\emph{All forecast types} require a data.frame or similar with columns \code{observed} -\code{predicted}, and \code{model}. +\code{forecast} and an additional class \verb{forecast_} corresponding to the +forecast type). -\emph{Point forecasts} require a column \code{observed} of type numeric and a column -\code{predicted} of type numeric. - -\emph{Binary forecasts} require a column \code{observed} of type factor with exactly -two levels and a column \code{predicted} of type numeric with probabilities, -corresponding to the probability that \code{observed} is equal to the second -factor level. See details \link[=brier_score]{here} for more information. - -\emph{Nominal forecasts} require a column \code{observed} of type factor with N levels, -(where N is the number of possible outcomes), a column \code{predicted} of type -numeric with probabilities (which sum to one across all possible outcomes), -and a column \code{predicted_label} of type factor with N levels, denoting the -outcome for which a probability is given. Forecasts must be complete, i.e. -there must be a probability assigned to every possible outcome. - -\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{quantile_level} of type -numeric with quantile-levels (between 0 and 1). - -\emph{Sample-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{sample_id} of type -numeric with sample indices. - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_sample_continuous}, \link{example_sample_discrete}, -\code{\link[=example_point]{example_point()}}, \link{example_binary}, and \link{example_nominal}). +See the corresponding \verb{assert_forecast_} functions for more details on +the required input formats. } - \examples{ forecast <- as_forecast_binary(example_binary) assert_forecast(forecast) diff --git a/man/assert_forecast_type.Rd b/man/assert_forecast_type.Rd index 7667303a..b3074823 100644 --- a/man/assert_forecast_type.Rd +++ b/man/assert_forecast_type.Rd @@ -7,7 +7,7 @@ assert_forecast_type(data, actual = get_forecast_type(data), desired = NULL) } \arguments{ -\item{data}{A forecast object (see \code{\link[=as_forecast]{as_forecast()}}).} +\item{data}{A forecast object.} \item{actual}{The actual forecast type of the data} diff --git a/man/check_duplicates.Rd b/man/check_duplicates.Rd index 30c1d4c9..38c6da11 100644 --- a/man/check_duplicates.Rd +++ b/man/check_duplicates.Rd @@ -8,8 +8,8 @@ check_duplicates(data) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} } \value{ Returns TRUE if the check was successful and a string with an diff --git a/man/clean_forecast.Rd b/man/clean_forecast.Rd index 431f72a5..0cf20615 100644 --- a/man/clean_forecast.Rd +++ b/man/clean_forecast.Rd @@ -8,7 +8,7 @@ clean_forecast(forecast, copy = FALSE, na.omit = FALSE) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{copy}{Logical, default is \code{FALSE}. If \code{TRUE}, a copy of the input data is created.} diff --git a/man/example_binary.Rd b/man/example_binary.Rd index dfe83e81..86a1fb47 100644 --- a/man/example_binary.Rd +++ b/man/example_binary.Rd @@ -5,8 +5,8 @@ \alias{example_binary} \title{Binary forecast example data} \format{ -An object of class \code{forecast_binary} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_binary} (see \code{\link[=as_forecast_binary]{as_forecast_binary()}}) +with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{location_name}{name of the country for which a prediction was made} diff --git a/man/example_nominal.Rd b/man/example_nominal.Rd index 71d18635..f42f958c 100644 --- a/man/example_nominal.Rd +++ b/man/example_nominal.Rd @@ -5,8 +5,8 @@ \alias{example_nominal} \title{Nominal example data} \format{ -An object of class \code{forecast_nominal} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_nominal} +(see \code{\link[=as_forecast_nominal]{as_forecast_nominal()}}) with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{target_end_date}{the date for which a prediction was made} diff --git a/man/example_point.Rd b/man/example_point.Rd index a6f325e1..5b3d22c1 100644 --- a/man/example_point.Rd +++ b/man/example_point.Rd @@ -5,8 +5,8 @@ \alias{example_point} \title{Point forecast example data} \format{ -An object of class \code{forecast_point} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_point} (see \code{\link[=as_forecast_point]{as_forecast_point()}}) +with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{target_end_date}{the date for which a prediction was made} diff --git a/man/example_quantile.Rd b/man/example_quantile.Rd index d16a04d1..b4d3e242 100644 --- a/man/example_quantile.Rd +++ b/man/example_quantile.Rd @@ -5,8 +5,8 @@ \alias{example_quantile} \title{Quantile example data} \format{ -An object of class \code{forecast_quantile} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_quantile} +(see \code{\link[=as_forecast_quantile]{as_forecast_quantile()}}) with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{target_end_date}{the date for which a prediction was made} diff --git a/man/example_sample_continuous.Rd b/man/example_sample_continuous.Rd index 3a447f3c..5d8fe86e 100644 --- a/man/example_sample_continuous.Rd +++ b/man/example_sample_continuous.Rd @@ -5,8 +5,8 @@ \alias{example_sample_continuous} \title{Continuous forecast example data} \format{ -An object of class \code{forecast_sample} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_sample} (see \code{\link[=as_forecast_sample]{as_forecast_sample()}}) +with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{target_end_date}{the date for which a prediction was made} diff --git a/man/example_sample_discrete.Rd b/man/example_sample_discrete.Rd index 040e9390..9ee74994 100644 --- a/man/example_sample_discrete.Rd +++ b/man/example_sample_discrete.Rd @@ -5,8 +5,8 @@ \alias{example_sample_discrete} \title{Discrete forecast example data} \format{ -An object of class \code{forecast_sample} (see \code{\link[=as_forecast]{as_forecast()}}) with the -following columns: +An object of class \code{forecast_sample} (see \code{\link[=as_forecast_sample]{as_forecast_sample()}}) +with the following columns: \describe{ \item{location}{the country for which a prediction was made} \item{target_end_date}{the date for which a prediction was made} diff --git a/man/figures/metrics-nominal.png b/man/figures/metrics-nominal.png new file mode 100644 index 00000000..9991d6d0 Binary files /dev/null and b/man/figures/metrics-nominal.png differ diff --git a/man/forecast_types.Rd b/man/forecast_types.Rd index c299fcbf..dab78d5a 100644 --- a/man/forecast_types.Rd +++ b/man/forecast_types.Rd @@ -6,61 +6,6 @@ \description{ Documentation template for forecast types } -\section{Forecast types and input formats}{ -Various different forecast types / forecast formats are supported. At the -moment, those are: -\itemize{ -\item point forecasts -\item binary forecasts ("soft binary classification") -\item nominal forecasts ("soft classification with multiple unordered classes") -\item Probabilistic forecasts in a quantile-based format (a forecast is -represented as a set of predictive quantiles) -\item Probabilistic forecasts in a sample-based format (a forecast is represented -as a set of predictive samples) -} - -Forecast types are determined based on the columns present in the input data. -Here is an overview of the required format for each forecast type: -\if{html}{ - \out{
} - \figure{required-inputs.png}{options: style="width:750px;max-width:100\%;"} - \out{
} -} -\if{latex}{ - \figure{required-inputs.png} -} - -\emph{All forecast types} require a data.frame or similar with columns \code{observed} -\code{predicted}, and \code{model}. - -\emph{Point forecasts} require a column \code{observed} of type numeric and a column -\code{predicted} of type numeric. - -\emph{Binary forecasts} require a column \code{observed} of type factor with exactly -two levels and a column \code{predicted} of type numeric with probabilities, -corresponding to the probability that \code{observed} is equal to the second -factor level. See details \link[=brier_score]{here} for more information. - -\emph{Nominal forecasts} require a column \code{observed} of type factor with N levels, -(where N is the number of possible outcomes), a column \code{predicted} of type -numeric with probabilities (which sum to one across all possible outcomes), -and a column \code{predicted_label} of type factor with N levels, denoting the -outcome for which a probability is given. Forecasts must be complete, i.e. -there must be a probability assigned to every possible outcome. - -\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{quantile_level} of type -numeric with quantile-levels (between 0 and 1). - -\emph{Sample-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{sample_id} of type -numeric with sample indices. - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_sample_continuous}, \link{example_sample_discrete}, -\code{\link[=example_point]{example_point()}}, \link{example_binary}, and \link{example_nominal}). -} - \section{Forecast unit}{ In order to score forecasts, \code{scoringutils} needs to know which of the rows of the data belong together and jointly form a single forecasts. This is @@ -82,9 +27,9 @@ is even and zero otherwise, then this would mess up scoring as \code{scoringutil then thinks that this column was relevant in defining the forecast unit. In order to avoid issues, we recommend setting the forecast unit explicitly, -usually through the \code{forecast_unit} argument in the \code{\link[=as_forecast]{as_forecast()}} -functions. This will drop unneeded columns, while making sure that all -necessary, 'protected columns' like "predicted" or "observed" are retained. +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. } \keyword{internal} diff --git a/man/get_coverage.Rd b/man/get_coverage.Rd index 0dcdb036..f9bf580d 100644 --- a/man/get_coverage.Rd +++ b/man/get_coverage.Rd @@ -8,7 +8,7 @@ get_coverage(forecast, by = "model") } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{by}{character vector that denotes the level of grouping for which the coverage values should be computed. By default (\code{"model"}), one coverage @@ -24,7 +24,7 @@ a data.table with columns "interval_coverage", } \description{ For a validated forecast object in a quantile-based format -(see \code{\link[=as_forecast]{as_forecast()}} for more information), this function computes: +(see \code{\link[=as_forecast_quantile]{as_forecast_quantile()}} for more information), this function computes: \itemize{ \item interval coverage of central prediction intervals \item quantile coverage for predictive quantiles diff --git a/man/get_duplicate_forecasts.Rd b/man/get_duplicate_forecasts.Rd index b69f67fe..893492f6 100644 --- a/man/get_duplicate_forecasts.Rd +++ b/man/get_duplicate_forecasts.Rd @@ -8,8 +8,8 @@ get_duplicate_forecasts(data, forecast_unit = NULL, counts = FALSE) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{(optional) Name of the columns in \code{data} (after any renaming of columns) that denote the unit of a diff --git a/man/get_forecast_counts.Rd b/man/get_forecast_counts.Rd index 6a0cc74c..a528c146 100644 --- a/man/get_forecast_counts.Rd +++ b/man/get_forecast_counts.Rd @@ -12,7 +12,7 @@ get_forecast_counts( } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{by}{character vector or \code{NULL} (the default) that denotes the categories over which the number of forecasts should be counted. diff --git a/man/get_forecast_type.Rd b/man/get_forecast_type.Rd index 43f2e05c..237c1623 100644 --- a/man/get_forecast_type.Rd +++ b/man/get_forecast_type.Rd @@ -8,7 +8,7 @@ get_forecast_type(forecast) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} } \value{ Character vector of length one with the forecast type. diff --git a/man/get_forecast_unit.Rd b/man/get_forecast_unit.Rd index cf2348e8..942628b8 100644 --- a/man/get_forecast_unit.Rd +++ b/man/get_forecast_unit.Rd @@ -8,8 +8,8 @@ get_forecast_unit(data) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} } \value{ A character vector with the column names that define the unit of @@ -44,9 +44,9 @@ is even and zero otherwise, then this would mess up scoring as \code{scoringutil then thinks that this column was relevant in defining the forecast unit. In order to avoid issues, we recommend setting the forecast unit explicitly, -usually through the \code{forecast_unit} argument in the \code{\link[=as_forecast]{as_forecast()}} -functions. This will drop unneeded columns, while making sure that all -necessary, 'protected columns' like "predicted" or "observed" are retained. +using the \code{forecast_unit} argument. This will simply drop unneeded columns, +while making sure that all necessary, 'protected columns' like "predicted" +or "observed" are retained. } \keyword{diagnose-inputs} diff --git a/man/get_metrics.Rd b/man/get_metrics.Rd index 9c3f2192..efed2db9 100644 --- a/man/get_metrics.Rd +++ b/man/get_metrics.Rd @@ -12,10 +12,10 @@ get_metrics(x, ...) \item{...}{Additional arguments passed to the method.} } \description{ -Generic function to to obtain default metrics availble for scoring or metrics +Generic function to to obtain default metrics available for scoring or metrics that were used for scoring. \itemize{ -\item If called on \code{forecast} object it returns a list of functions that can be +\item If called on a \code{forecast} object it returns a list of functions that can be used for scoring. \item If called on a \code{scores} object (see \code{\link[=score]{score()}}), it returns a character vector with the names of the metrics that were used for scoring. @@ -25,10 +25,6 @@ See the documentation for the actual methods in the \verb{See Also} section belo for more details. Alternatively call \verb{?get_metrics.} or \code{?get_metrics.scores}. } -\details{ -See \code{\link[=as_forecast]{as_forecast()}} for more information on \code{forecast} objects and \code{\link[=score]{score()}} -for more information on \code{scores} objects. -} \seealso{ Other get_metrics functions: \code{\link{get_metrics.forecast_binary}()}, diff --git a/man/get_metrics.forecast_binary.Rd b/man/get_metrics.forecast_binary.Rd index 98de45ae..a9e2ab3a 100644 --- a/man/get_metrics.forecast_binary.Rd +++ b/man/get_metrics.forecast_binary.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values, see \code{\link[=as_forecast_binary]{as_forecast_binary()}}).} \item{select}{A character vector of scoring rules to select from the list. If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} diff --git a/man/get_metrics.forecast_nominal.Rd b/man/get_metrics.forecast_nominal.Rd index 45a35d08..bacd05cc 100644 --- a/man/get_metrics.forecast_nominal.Rd +++ b/man/get_metrics.forecast_nominal.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values, see \code{\link[=as_forecast_binary]{as_forecast_binary()}}).} \item{select}{A character vector of scoring rules to select from the list. If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} diff --git a/man/get_metrics.forecast_point.Rd b/man/get_metrics.forecast_point.Rd index df83ce61..38255ab1 100644 --- a/man/get_metrics.forecast_point.Rd +++ b/man/get_metrics.forecast_point.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values, see \code{\link[=as_forecast_binary]{as_forecast_binary()}}).} \item{select}{A character vector of scoring rules to select from the list. If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} diff --git a/man/get_metrics.forecast_quantile.Rd b/man/get_metrics.forecast_quantile.Rd index 10197406..58396865 100644 --- a/man/get_metrics.forecast_quantile.Rd +++ b/man/get_metrics.forecast_quantile.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values, see \code{\link[=as_forecast_binary]{as_forecast_binary()}}).} \item{select}{A character vector of scoring rules to select from the list. If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} diff --git a/man/get_metrics.forecast_sample.Rd b/man/get_metrics.forecast_sample.Rd index 42272489..2a94caa9 100644 --- a/man/get_metrics.forecast_sample.Rd +++ b/man/get_metrics.forecast_sample.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values, see \code{\link[=as_forecast_binary]{as_forecast_binary()}}).} \item{select}{A character vector of scoring rules to select from the list. If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} diff --git a/man/get_pit_histogram.Rd b/man/get_pit_histogram.Rd index 074aa494..cb4273f2 100644 --- a/man/get_pit_histogram.Rd +++ b/man/get_pit_histogram.Rd @@ -26,7 +26,7 @@ get_pit_histogram(forecast, num_bins, breaks, by, ...) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{num_bins}{The number of bins in the PIT histogram. For sample-based forecasts, the default is 10 bins. For quantile-based forecasts, the diff --git a/man/get_protected_columns.Rd b/man/get_protected_columns.Rd index 2abf59ee..6117496f 100644 --- a/man/get_protected_columns.Rd +++ b/man/get_protected_columns.Rd @@ -8,8 +8,8 @@ get_protected_columns(data = NULL) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} } \value{ A character vector with the names of protected columns in the data. diff --git a/man/illustration-input-metric-nominal.Rd b/man/illustration-input-metric-nominal.Rd new file mode 100644 index 00000000..bce6cc36 --- /dev/null +++ b/man/illustration-input-metric-nominal.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{illustration-input-metric-nominal} +\alias{illustration-input-metric-nominal} +\title{Illustration of required inputs for nominal forecasts} +\description{ +Illustration of required inputs for nominal forecasts +} +\section{Input format}{ +\if{html}{ + \out{
} + \figure{metrics-nominal.png}{options: style="width:750px;max-width:100\%;"} + \out{
} +} +\if{latex}{ + \figure{metrics-nominal.png} +} +} + +\keyword{internal} diff --git a/man/is_forecast.Rd b/man/is_forecast.Rd index a52d396b..01e77271 100644 --- a/man/is_forecast.Rd +++ b/man/is_forecast.Rd @@ -34,11 +34,10 @@ is_forecast(x) to class \code{forecast}, \code{FALSE} otherwise. } \description{ -Test whether an object is a forecast object (see \code{\link[=as_forecast]{as_forecast()}} for more -information). +Test whether an object is a forecast object. -You can test for a specific \verb{forecast_*} class using the appropriate -\verb{is_forecast_*} function. +You can test for a specific \verb{forecast_} class using the appropriate +\verb{is_forecast_} function. } \examples{ forecast_binary <- as_forecast_binary(example_binary) diff --git a/man/new_forecast.Rd b/man/new_forecast.Rd index b07c5f15..b9286856 100644 --- a/man/new_forecast.Rd +++ b/man/new_forecast.Rd @@ -8,8 +8,8 @@ new_forecast(data, classname) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{classname}{name of the class to be created} } diff --git a/man/print.forecast.Rd b/man/print.forecast.Rd index f5c2c8f7..477866b5 100644 --- a/man/print.forecast.Rd +++ b/man/print.forecast.Rd @@ -7,8 +7,7 @@ \method{print}{forecast}(x, ...) } \arguments{ -\item{x}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +\item{x}{A forecast object} \item{...}{Additional arguments for \code{\link[=print]{print()}}.} } diff --git a/man/quantile_to_interval.Rd b/man/quantile_to_interval.Rd index 168352df..d38a535b 100644 --- a/man/quantile_to_interval.Rd +++ b/man/quantile_to_interval.Rd @@ -21,7 +21,7 @@ quantile_to_interval_numeric(observed, predicted, quantile_level, ...) \item{...}{Arguments} \item{forecast}{A data.table with forecasts in a quantile-based format (see -\code{\link[=as_forecast]{as_forecast()}}).} +\code{\link[=as_forecast_quantile]{as_forecast_quantile()}}).} \item{format}{The format of the output. Either "long" or "wide". If "long" (the default), there will be a column \code{boundary} (with values either diff --git a/man/sample_to_interval_long.Rd b/man/sample_to_interval_long.Rd index 58f903d9..012e1eae 100644 --- a/man/sample_to_interval_long.Rd +++ b/man/sample_to_interval_long.Rd @@ -13,8 +13,8 @@ sample_to_interval_long( } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{type}{Type argument passed down to the quantile function. For more information, see \code{\link[=quantile]{quantile()}}.} diff --git a/man/score.Rd b/man/score.Rd index a3d1b5af..97ea5e3f 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -25,7 +25,7 @@ score(forecast, metrics, ...) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{metrics}{A named list of scoring functions. Names will be used as column names in the output. See \code{\link[=get_metrics]{get_metrics()}} for more information on the @@ -46,12 +46,16 @@ scores. } \description{ \code{score()} applies a selection of scoring metrics to a forecast -object (a data.table with forecasts and observations) (see \code{\link[=as_forecast]{as_forecast()}}). +object. \code{score()} is a generic that dispatches to different methods depending on the class of the input data. -See the \emph{Forecast types and input formats} section for more information on -forecast types and input formats. +See \code{\link[=as_forecast_binary]{as_forecast_binary()}}, \code{\link[=as_forecast_quantile]{as_forecast_quantile()}} etc. for information on +how to create a forecast object. + +See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for more information on the concept of a forecast +unit. + For additional help and examples, check out the \href{https://epiforecasts.io/scoringutils/articles/scoringutils.html}{Getting Started Vignette} as well as the paper \href{https://arxiv.org/abs/2205.07090}{Evaluating Forecasts with scoringutils in R}. } @@ -79,87 +83,6 @@ print1() # prints 'bar' print2() # prints 'foo' }\if{html}{\out{}} } -\section{Forecast types and input formats}{ -Various different forecast types / forecast formats are supported. At the -moment, those are: -\itemize{ -\item point forecasts -\item binary forecasts ("soft binary classification") -\item nominal forecasts ("soft classification with multiple unordered classes") -\item Probabilistic forecasts in a quantile-based format (a forecast is -represented as a set of predictive quantiles) -\item Probabilistic forecasts in a sample-based format (a forecast is represented -as a set of predictive samples) -} - -Forecast types are determined based on the columns present in the input data. -Here is an overview of the required format for each forecast type: -\if{html}{ - \out{
} - \figure{required-inputs.png}{options: style="width:750px;max-width:100\%;"} - \out{
} -} -\if{latex}{ - \figure{required-inputs.png} -} - -\emph{All forecast types} require a data.frame or similar with columns \code{observed} -\code{predicted}, and \code{model}. - -\emph{Point forecasts} require a column \code{observed} of type numeric and a column -\code{predicted} of type numeric. - -\emph{Binary forecasts} require a column \code{observed} of type factor with exactly -two levels and a column \code{predicted} of type numeric with probabilities, -corresponding to the probability that \code{observed} is equal to the second -factor level. See details \link[=brier_score]{here} for more information. - -\emph{Nominal forecasts} require a column \code{observed} of type factor with N levels, -(where N is the number of possible outcomes), a column \code{predicted} of type -numeric with probabilities (which sum to one across all possible outcomes), -and a column \code{predicted_label} of type factor with N levels, denoting the -outcome for which a probability is given. Forecasts must be complete, i.e. -there must be a probability assigned to every possible outcome. - -\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{quantile_level} of type -numeric with quantile-levels (between 0 and 1). - -\emph{Sample-based forecasts} require a column \code{observed} of type numeric, -a column \code{predicted} of type numeric, and a column \code{sample_id} of type -numeric with sample indices. - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_sample_continuous}, \link{example_sample_discrete}, -\code{\link[=example_point]{example_point()}}, \link{example_binary}, and \link{example_nominal}). -} - -\section{Forecast unit}{ -In order to score forecasts, \code{scoringutils} needs to know which of the rows -of the data belong together and jointly form a single forecasts. This is -easy e.g. for point forecast, where there is one row per forecast. For -quantile or sample-based forecasts, however, there are multiple rows that -belong to a single forecast. - -The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the -combination of columns that uniquely identify a single forecast. -For example, we could have forecasts made by different models in various -locations at different time points, each for several weeks into the future. -The forecast unit could then be described as -\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. -\code{scoringutils} automatically tries to determine the unit of a single -forecast. It uses all existing columns for this, which means that no columns -must be present that are unrelated to the forecast unit. As a very simplistic -example, if you had an additional row, "even", that is one if the row number -is even and zero otherwise, then this would mess up scoring as \code{scoringutils} -then thinks that this column was relevant in defining the forecast unit. - -In order to avoid issues, we recommend setting the forecast unit explicitly, -usually through the \code{forecast_unit} argument in the \code{\link[=as_forecast]{as_forecast()}} -functions. This will drop unneeded columns, while making sure that all -necessary, 'protected columns' like "predicted" or "observed" are retained. -} - \examples{ library(magrittr) # pipe operator \dontshow{ diff --git a/man/scoring-functions-nominal.Rd b/man/scoring-functions-nominal.Rd index a7b6d81e..377e156e 100644 --- a/man/scoring-functions-nominal.Rd +++ b/man/scoring-functions-nominal.Rd @@ -27,6 +27,17 @@ The Log Score is the negative logarithm of the probability assigned to the observed value. It is a proper scoring rule. Small values are better (best is zero, worst is infinity). } +\section{Input format}{ +\if{html}{ + \out{
} + \figure{metrics-nominal.png}{options: style="width:750px;max-width:100\%;"} + \out{
} +} +\if{latex}{ + \figure{metrics-nominal.png} +} +} + \examples{ factor_levels <- c("one", "two", "three") predicted_label <- factor(c("one", "two", "three"), levels = factor_levels) diff --git a/man/set_forecast_unit.Rd b/man/set_forecast_unit.Rd index 7bc42e0c..513ad8ac 100644 --- a/man/set_forecast_unit.Rd +++ b/man/set_forecast_unit.Rd @@ -8,8 +8,8 @@ set_forecast_unit(data, forecast_unit) } \arguments{ \item{data}{A data.frame (or similar) with predicted and observed values. -See the details section of \code{\link[=as_forecast]{as_forecast()}} for additional information -on required input formats.} +See the details section of for additional information +on the required input format.} \item{forecast_unit}{Character vector with the names of the columns that uniquely identify a single forecast.} @@ -24,8 +24,8 @@ combination of columns that uniquely define a single forecast) manually. This simple function keeps the columns specified in \code{forecast_unit} (plus additional protected columns, e.g. for observed values, predictions or quantile levels) and removes duplicate rows. \code{set_forecast_unit()} will -mainly be called when constructing a \code{forecast} object (see \code{\link[=as_forecast]{as_forecast()}}) -via the \code{forecast_unit} argument there. +mainly be called when constructing a \code{forecast} object +via the \code{forecast_unit} argument in \verb{as_forecast_}. If not done explicitly, \code{scoringutils} attempts to determine the unit of a single forecast automatically by simply assuming that all column names diff --git a/man/transform_forecasts.Rd b/man/transform_forecasts.Rd index fddd040f..a4c5110e 100644 --- a/man/transform_forecasts.Rd +++ b/man/transform_forecasts.Rd @@ -14,7 +14,7 @@ transform_forecasts( } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and -observed values, see \code{\link[=as_forecast]{as_forecast()}}).} +observed values).} \item{fun}{A function used to transform both observed values and predictions. The default function is \code{\link[=log_shift]{log_shift()}}, a custom function that is