From 8bef7fa2a7d28727e24ed51c8a34b73acb3c2ef6 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sun, 25 Feb 2024 10:52:55 -0600 Subject: [PATCH 01/10] Update snapshots after testthat update --- .../plot-available-forecasts.svg | 22 +- .../plot_correlation/plot-correlation.svg | 14 +- .../_snaps/plot_heatmap/plot-heatmap.svg | 18 +- .../plot_predictions/many-quantiles.svg | 628 +++++++++--------- .../plot_ranges/plot-ranges-dispersion.svg | 18 +- .../plot_ranges/plot-ranges-interval.svg | 18 +- 6 files changed, 359 insertions(+), 359 deletions(-) diff --git a/tests/testthat/_snaps/plot_avail_forecasts/plot-available-forecasts.svg b/tests/testthat/_snaps/plot_avail_forecasts/plot-available-forecasts.svg index 6909ba780..e03cc1946 100644 --- a/tests/testthat/_snaps/plot_avail_forecasts/plot-available-forecasts.svg +++ b/tests/testthat/_snaps/plot_avail_forecasts/plot-available-forecasts.svg @@ -213,23 +213,23 @@ target_end_date model +Count + + + + + + + + + + 0 3 6 9 12 -Count - - - - - - - - - - plot_available_forecasts diff --git a/tests/testthat/_snaps/plot_correlation/plot-correlation.svg b/tests/testthat/_snaps/plot_correlation/plot-correlation.svg index d51d9d2b0..fbe1da1fb 100644 --- a/tests/testthat/_snaps/plot_correlation/plot-correlation.svg +++ b/tests/testthat/_snaps/plot_correlation/plot-correlation.svg @@ -155,17 +155,17 @@ underprediction overprediction wis +Correlation + + + + + + 0.0 0.5 1.0 -Correlation - - - - - - plot__correlation diff --git a/tests/testthat/_snaps/plot_heatmap/plot-heatmap.svg b/tests/testthat/_snaps/plot_heatmap/plot-heatmap.svg index 8c4222d9a..90228158b 100644 --- a/tests/testthat/_snaps/plot_heatmap/plot-heatmap.svg +++ b/tests/testthat/_snaps/plot_heatmap/plot-heatmap.svg @@ -57,20 +57,20 @@ Deaths target_type model +bias + + + + + + + + 0.0 0.1 0.2 0.3 -bias - - - - - - - - plot_heatmap diff --git a/tests/testthat/_snaps/plot_predictions/many-quantiles.svg b/tests/testthat/_snaps/plot_predictions/many-quantiles.svg index 63f5bdcc3..88cbc8657 100644 --- a/tests/testthat/_snaps/plot_predictions/many-quantiles.svg +++ b/tests/testthat/_snaps/plot_predictions/many-quantiles.svg @@ -20,299 +20,299 @@ - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - + - - + + - -IT -Cases + +IT +Cases - - + + - -IT -Deaths + +IT +Deaths - - + + - + - - + + - -FR -Deaths + +FR +Deaths - - + + - -GB -Cases + +GB +Cases - - + + - -GB -Deaths + +GB +Deaths @@ -348,121 +348,121 @@ Cases - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - --2e+05 --1e+05 -0e+00 -1e+05 - - - - - -100 -200 - - - -400 -800 -1200 -1600 - - - - - -0e+00 -1e+05 -2e+05 -3e+05 - - - - - -500 -1000 -1500 - - - - -0 -30000 -60000 -90000 - - - - - -400 -800 -1200 - - - - -20000 -40000 -60000 - - - -target_end_date -True and predicted values -interval_range - - + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + +-2e+05 +-1e+05 +0e+00 +1e+05 + + + + + +100 +200 + + + +400 +800 +1200 +1600 + + + + + +0e+00 +1e+05 +2e+05 +3e+05 + + + + + +500 +1000 +1500 + + + + +0 +30000 +60000 +90000 + + + + + +400 +800 +1200 + + + + +20000 +40000 +60000 + + + +target_end_date +True and predicted values +interval_range + + - - + + - - + + -60 +60 50 -40 +40 30 -20 +20 10 many_quantiles diff --git a/tests/testthat/_snaps/plot_ranges/plot-ranges-dispersion.svg b/tests/testthat/_snaps/plot_ranges/plot-ranges-dispersion.svg index cab361e8e..788779b49 100644 --- a/tests/testthat/_snaps/plot_ranges/plot-ranges-dispersion.svg +++ b/tests/testthat/_snaps/plot_ranges/plot-ranges-dispersion.svg @@ -193,20 +193,20 @@ model dispersion +interval_range + + + + + + + + 0 25 50 75 -interval_range - - - - - - - - plot_ranges_dispersion diff --git a/tests/testthat/_snaps/plot_ranges/plot-ranges-interval.svg b/tests/testthat/_snaps/plot_ranges/plot-ranges-interval.svg index a1fb5f277..0448f3b00 100644 --- a/tests/testthat/_snaps/plot_ranges/plot-ranges-interval.svg +++ b/tests/testthat/_snaps/plot_ranges/plot-ranges-interval.svg @@ -204,20 +204,20 @@ model wis +interval_range + + + + + + + + 0 25 50 75 -interval_range - - - - - - - - plot_ranges_interval From b69552964e95fe1f93b139668acc81e8ec4d46a2 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Tue, 20 Feb 2024 18:25:12 -0600 Subject: [PATCH 02/10] Update functionality of as_forceast() --- R/validate.R | 74 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/R/validate.R b/R/validate.R index 13f84af74..dde7a5bbf 100644 --- a/R/validate.R +++ b/R/validate.R @@ -18,16 +18,84 @@ #' @keywords check-forecasts #' @examples #' as_forecast(example_binary) -#' as_forecast(example_quantile) -as_forecast <- function(data, ...) { +#' as_forecast( +#' example_quantile, +#' forecast_unit = c("model", "target_type", "target_end_date", +#' "horizon", "location") +#' ) +as_forecast <- function(data, + ...) { UseMethod("as_forecast") } #' @rdname as_forecast +#' @param observed (optional) Name of the column in `data` that contains the +#' observed values. This column will be renamed to "observed". +#' @param predicted (optional) Name of the column in `data` that contains the +#' predicted values. This column will be renamed to "predicted". +#' @param model (optional) Name of the column in `data` that contains the names +#' of the models/forecasters that generated the predicted values. +#' This column will be renamed to "model". +#' @param forecast_unit (optional) Name of the columns in `data` (after +#' renaming) that denote the unit of a single forecast. +#' See [get_forecast_unit()] for details. +#' @param quantile_level (optional) Name of the column in `data` that contains +#' the quantile level of the predicted values. This column will be renamed to +#' "quantile_level". Only applicable to quantile-based forecasts. +#' @param sample_id (optional) Name of the column in `data` that contains the +#' sample id. This column will be renamed to "sample_id". Only applicable to +#' sample-based forecasts. #' @export -as_forecast.default <- function(data, ...) { +as_forecast.default <- function(data, + observed = NULL, + predicted = NULL, + model = NULL, + forecast_unit = NULL, + quantile_level = NULL, + sample_id = NULL, + ...) { + # check inputs + data <- ensure_data.table(data) + assert_character(observed, len = 1, null.ok = TRUE) + assert_subset(observed, names(data), empty.ok = TRUE) + + assert_character(predicted, len = 1, null.ok = TRUE) + assert_subset(predicted, names(data), empty.ok = TRUE) + + assert_character(model, len = 1, null.ok = TRUE) + assert_subset(model, names(data), empty.ok = TRUE) + + assert_character(quantile_level, len = 1, null.ok = TRUE) + assert_subset(quantile_level, names(data), empty.ok = TRUE) + + assert_character(sample_id, len = 1, null.ok = TRUE) + assert_subset(sample_id, names(data), empty.ok = TRUE) + + # rename columns + if (!is.null(observed)) { + setnames(data, old = observed, new = "observed") + } + if (!is.null(predicted)) { + setnames(data, old = predicted, new = "predicted") + } + if (!is.null(model)) { + setnames(data, old = model, new = "model") + } + if (!is.null(quantile_level)) { + setnames(data, old = quantile_level, new = "quantile_level") + } + if (!is.null(sample_id)) { + setnames(data, old = sample_id, new = "sample_id") + } + + # assert that everything worked out assert(check_data_columns(data)) + # set forecast unit (error handling is done in `set_forecast_unit()`) + if (!is.null(forecast_unit)) { + data <- set_forecast_unit(data, forecast_unit) + } + # find forecast type forecast_type <- get_forecast_type(data) From af488597219159c53150d5a8a0619ced9c499b2a Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Tue, 20 Feb 2024 18:25:20 -0600 Subject: [PATCH 03/10] Update tests --- man/as_forecast.Rd | 39 +++++++++++++++++++++++++++++-- tests/testthat/test-as_forecast.R | 31 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd index 8f8b23939..dbcbf6e74 100644 --- a/man/as_forecast.Rd +++ b/man/as_forecast.Rd @@ -7,12 +7,43 @@ \usage{ as_forecast(data, ...) -\method{as_forecast}{default}(data, ...) +\method{as_forecast}{default}( + data, + observed = NULL, + predicted = NULL, + model = NULL, + forecast_unit = NULL, + quantile_level = NULL, + sample_id = NULL, + ... +) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} \item{...}{additional arguments} + +\item{observed}{(optional) Name of the column in \code{data} that contains the +observed values. This column will be renamed to "observed".} + +\item{predicted}{(optional) Name of the column in \code{data} that contains the +predicted values. This column will be renamed to "predicted".} + +\item{model}{(optional) Name of the column in \code{data} that contains the names +of the models/forecasters that generated the predicted values. +This column will be renamed to "model".} + +\item{forecast_unit}{(optional) Name of the columns in \code{data} (after +renaming) that denote the unit of a single forecast. +See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for details.} + +\item{quantile_level}{(optional) Name of the column in \code{data} that contains +the quantile level of the predicted values. This column will be renamed to +"quantile_level". Only applicable to quantile-based forecasts.} + +\item{sample_id}{(optional) Name of the column in \code{data} that contains the +sample id. This column will be renamed to "sample_id". Only applicable to +sample-based forecasts.} } \value{ Depending on the forecast type, an object of class @@ -68,6 +99,10 @@ For more information see the vignettes and the example data \examples{ as_forecast(example_binary) -as_forecast(example_quantile) +as_forecast( + example_quantile, + forecast_unit = c("model", "target_type", "target_end_date", + "horizon", "location") +) } \keyword{check-forecasts} diff --git a/tests/testthat/test-as_forecast.R b/tests/testthat/test-as_forecast.R index df9ca9a95..158d26d86 100644 --- a/tests/testthat/test-as_forecast.R +++ b/tests/testthat/test-as_forecast.R @@ -7,6 +7,37 @@ test_that("Running `as_forecast()` twice returns the same object", { ) }) +test_that("as_forecast() works as expected", { + test <- na.omit(data.table::copy(example_quantile)) + expect_s3_class(as_forecast(test), "forecast_quantile") + + # expect error when arguments are not correct + expect_error(as_forecast(test, observed = 3), "Must be of type 'character'") + expect_error(as_forecast(test, quantile_level = c("1", "2")), "Must have length 1") + expect_error(as_forecast(test, observed = "missing"), "Must be a subset of") + + # expect no condition with columns already present + expect_no_condition( + as_forecast(test, observed = "observed", predicted = "predicted", + forecast_unit = c("location", "model", "target_type", + "target_end_date", "horizon"), + quantile_level = "quantile_level") + ) + + # additional test with renaming the model column + test <- na.omit(data.table::copy(example_continuous)) + setnames(test, old = c("observed", "predicted", "sample_id", "model"), + new = c("obs", "pred", "sample", "mod")) + expect_no_condition( + as_forecast(test, + observed = "obs", predicted = "pred", model = "mod", + forecast_unit = c("location", "model", "target_type", + "target_end_date", "horizon"), + sample_id = "sample") + ) +}) + + test_that("is_forecast() works as expected", { ex_binary <- suppressMessages(as_forecast(example_binary)) ex_point <- suppressMessages(as_forecast(example_point)) From 64a3e8916dc04431fae9e486ad2a5013211a426a Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Tue, 20 Feb 2024 18:25:33 -0600 Subject: [PATCH 04/10] Update readme --- README.Rmd | 8 +++++--- README.md | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/README.Rmd b/README.Rmd index fb4094fe6..d22550647 100644 --- a/README.Rmd +++ b/README.Rmd @@ -120,12 +120,14 @@ example_quantile %>% ### Scoring forecasts -Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually using `set_forecast_unit()` as this may help to avoid errors, especially when scoringutils is used in automated pipelines. The function `set_forecast_unit()` will simply drop unneeded columns. To verify everything is in order, the function `validate_forecast()` should be used. The result of that check can then passed directly into `score()`. `score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. +Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set the forecast unit manually by specifying the "forecast_unit" argument in `as_forecast()` as this may help to avoid errors. This will drop all columns that are neither part of the forecast unit nor part of the columns internally used by `scoringutils`. The function `as_forecast()` processes and validates the inputs. +`score()` returns unsummarised scores, which in most cases is not what the user wants. Here we make use of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these scores by model and target type. ```{r score-example} example_quantile %>% - set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - as_forecast() %>% + as_forecast(forecast_unit = c( + "location", "target_end_date", "target_type", "horizon", "model" + )) %>% add_coverage() %>% score() %>% add_pairwise_comparison( diff --git a/README.md b/README.md index 8d5584895..b28ad64e4 100644 --- a/README.md +++ b/README.md @@ -134,14 +134,13 @@ Forecasts can be easily and quickly scored using the `score()` function. `score()` automatically tries to determine the `forecast_unit`, i.e. the set of columns that uniquely defines a single forecast, by taking all column names of the data into account. However, it is recommended to set -the forecast unit manually using `set_forecast_unit()` as this may help -to avoid errors, especially when scoringutils is used in automated -pipelines. The function `set_forecast_unit()` will simply drop unneeded -columns. To verify everything is in order, the function -`validate_forecast()` should be used. The result of that check can then -passed directly into `score()`. `score()` returns unsummarised scores, -which in most cases is not what the user wants. Here we make use of -additional functions from `scoringutils` to add empirical +the forecast unit manually by specifying the “forecast_unit” argument in +`as_forecast()` as this may help to avoid errors. This will drop all +columns that are neither part of the forecast unit nor part of the +columns internally used by `scoringutils`. The function `as_forecast()` +processes and validates the inputs. `score()` returns unsummarised +scores, which in most cases is not what the user wants. Here we make use +of additional functions from `scoringutils` to add empirical coverage-levels (`add_coverage()`), and scores relative to a baseline model (here chosen to be the EuroCOVIDhub-ensemble model). See the getting started vignette for more details. Finally we summarise these @@ -149,8 +148,9 @@ scores by model and target type. ``` r example_quantile %>% - set_forecast_unit(c("location", "target_end_date", "target_type", "horizon", "model")) %>% - as_forecast() %>% + as_forecast(forecast_unit = c( + "location", "target_end_date", "target_type", "horizon", "model" + )) %>% add_coverage() %>% score() %>% add_pairwise_comparison( @@ -226,7 +226,7 @@ example_quantile %>% #> underprediction dispersion bias interval_coverage_50 #> #> 1: 4237.177310 3663.52458 -0.05640625 0.3906250 -#> 2: 10284.972826 4102.50094 0.09726562 0.3281250 +#> 2: 10284.972826 4102.50094 0.09726563 0.3281250 #> 3: 3260.355639 5664.37795 -0.07890625 0.4687500 #> 4: 4.103261 30.18099 0.07265625 0.8750000 #> 5: 2.098505 91.40625 0.33906250 0.6640625 From e0c4eb463bf9c2fc33fc18ab0f4c46124cfaea7e Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Tue, 20 Feb 2024 18:26:15 -0600 Subject: [PATCH 05/10] Update News --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 5654520fb..fca7a5c27 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ The update introduces breaking changes. If you want to keep using the older vers ## Package updates - In `score()`, required columns "true_value" and "prediction" were renamed and replaced by required columns "observed" and "predicted". Scoring functions now also use the function arguments "observed" and "predicted" everywhere consistently. -- The overall scoring workflow was updated. `score()` is now a generic function that dispatches the correct method based on the forecast type. forecast types currently supported are "binary", "point", "sample" and "quantile" with corresponding classes "forecast_binary", "forecast_point", "forecast_sample" and "forecast_quantile". An object of class `forecast_*` can be created using the function `as_forecast()`, which also replaces the previous function `check_forecasts()` (see more information below). +- The overall scoring workflow was updated. `score()` is now a generic function that dispatches the correct method based on the forecast type. forecast types currently supported are "binary", "point", "sample" and "quantile" with corresponding classes "forecast_binary", "forecast_point", "forecast_sample" and "forecast_quantile". An object of class `forecast_*` can be created using the function `as_forecast()`, which also replaces the previous function `check_forecasts()` (see more information below). The function also allows users to rename required columns and specify the forecast unit in a single step, taking over the functionality of `set_forecast_unit()` in most cases. - Scoring rules (functions used for scoring) received a consistent interface and input checks: - Scoring rules for binary forecasts: - `observed`: factor with exactly 2 levels From 3c95baafa443f818fc1ed76272729ecefb9e3ac5 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 21 Feb 2024 00:29:15 +0000 Subject: [PATCH 06/10] Automatic readme update [ci skip] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b28ad64e4..66bf5c99c 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ example_quantile %>% #> underprediction dispersion bias interval_coverage_50 #> #> 1: 4237.177310 3663.52458 -0.05640625 0.3906250 -#> 2: 10284.972826 4102.50094 0.09726563 0.3281250 +#> 2: 10284.972826 4102.50094 0.09726562 0.3281250 #> 3: 3260.355639 5664.37795 -0.07890625 0.4687500 #> 4: 4.103261 30.18099 0.07265625 0.8750000 #> 5: 2.098505 91.40625 0.33906250 0.6640625 From ecad6f2e4fd4e1867b2deb273de641f5119cc830 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Tue, 20 Feb 2024 18:45:20 -0600 Subject: [PATCH 07/10] Nonsensical commit to trigger CI changes --- R/validate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/validate.R b/R/validate.R index dde7a5bbf..7580cd87e 100644 --- a/R/validate.R +++ b/R/validate.R @@ -88,7 +88,7 @@ as_forecast.default <- function(data, setnames(data, old = sample_id, new = "sample_id") } - # assert that everything worked out + # assert that the correct column names are present after renaming assert(check_data_columns(data)) # set forecast unit (error handling is done in `set_forecast_unit()`) From 00952b532902f68017557f5a84d5779b57ed044f Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 23 Feb 2024 08:01:00 -0600 Subject: [PATCH 08/10] Improve documentation --- R/validate.R | 31 ++++++++++++++++++++++--------- man/as_forecast.Rd | 35 +++++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/R/validate.R b/R/validate.R index 7580cd87e..3464937d4 100644 --- a/R/validate.R +++ b/R/validate.R @@ -2,13 +2,23 @@ #' @description Convert a data.frame or similar of forecasts into an object of #' class `forecast_*` and validate it. #' -#' `as_forecast()` determines the forecast type (binary, point, sample-based or +#' `as_forecast()` +#' - allows users to specify the current names of the columns that correspond +#' to the columns required by `scoringutils` (`observed`, `predicted`, +#' `model`, as well `quantile_level` for quantile-based forecasts and +#' `sample_id` for sample-based forecasts). `as_forecast()` renames the +#' existing columns. +#' - allows users to specify the unit of a single forecast. It removes all +#' columns that are neither part of the forecast unit nor a required column +#' (see [set_forecast_unit()] for details) +#' - Determines the forecast type (binary, point, sample-based or #' quantile-based) from the input data (using the function -#' [get_forecast_type()]. It then constructs an object of the -#' appropriate class (`forecast_binary`, `forecast_point`, `forecast_sample`, or +#' [get_forecast_type()]. +#' - Constructs a forecast object of the appropriate class +#' (`forecast_binary`, `forecast_point`, `forecast_sample`, or #' `forecast_quantile`, using the function [new_forecast()]). -#' Lastly, it calls [as_forecast()] on the object to make sure it conforms with -#' the required input formats. +#' - Calls [validate_forecast()] on the newly created forecast object to +#' validate it #' @inheritParams score #' @inheritSection forecast_types Forecast types and input format #' @return Depending on the forecast type, an object of class @@ -29,6 +39,12 @@ as_forecast <- function(data, } #' @rdname as_forecast +#' @param forecast_unit (optional) Name of the columns in `data` (after +#' any renaming of columns done by `as_forecast()`) that denote the unit of a +#' single forecast. See [get_forecast_unit()] for details. +#' If `NULL` (the default), all columns that are not required columns are +#' assumed to form the unit of a single forecast. If specified, all columns +#' that are not part of the forecast unit (or required columns) will be removed. #' @param observed (optional) Name of the column in `data` that contains the #' observed values. This column will be renamed to "observed". #' @param predicted (optional) Name of the column in `data` that contains the @@ -36,9 +52,6 @@ as_forecast <- function(data, #' @param model (optional) Name of the column in `data` that contains the names #' of the models/forecasters that generated the predicted values. #' This column will be renamed to "model". -#' @param forecast_unit (optional) Name of the columns in `data` (after -#' renaming) that denote the unit of a single forecast. -#' See [get_forecast_unit()] for details. #' @param quantile_level (optional) Name of the column in `data` that contains #' the quantile level of the predicted values. This column will be renamed to #' "quantile_level". Only applicable to quantile-based forecasts. @@ -47,10 +60,10 @@ as_forecast <- function(data, #' sample-based forecasts. #' @export as_forecast.default <- function(data, + forecast_unit = NULL, observed = NULL, predicted = NULL, model = NULL, - forecast_unit = NULL, quantile_level = NULL, sample_id = NULL, ...) { diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd index dbcbf6e74..fde4b04bd 100644 --- a/man/as_forecast.Rd +++ b/man/as_forecast.Rd @@ -9,10 +9,10 @@ as_forecast(data, ...) \method{as_forecast}{default}( data, + forecast_unit = NULL, observed = NULL, predicted = NULL, model = NULL, - forecast_unit = NULL, quantile_level = NULL, sample_id = NULL, ... @@ -23,6 +23,13 @@ as_forecast(data, ...) \item{...}{additional arguments} +\item{forecast_unit}{(optional) Name of the columns in \code{data} (after +any renaming of columns done by \code{as_forecast()}) that denote the unit of a +single forecast. See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for details. +If \code{NULL} (the default), all columns that are not required columns are +assumed to form the unit of a single forecast. If specified, all columns +that are not part of the forecast unit (or required columns) will be removed.} + \item{observed}{(optional) Name of the column in \code{data} that contains the observed values. This column will be renamed to "observed".} @@ -33,10 +40,6 @@ predicted values. This column will be renamed to "predicted".} of the models/forecasters that generated the predicted values. This column will be renamed to "model".} -\item{forecast_unit}{(optional) Name of the columns in \code{data} (after -renaming) that denote the unit of a single forecast. -See \code{\link[=get_forecast_unit]{get_forecast_unit()}} for details.} - \item{quantile_level}{(optional) Name of the column in \code{data} that contains the quantile level of the predicted values. This column will be renamed to "quantile_level". Only applicable to quantile-based forecasts.} @@ -54,13 +57,25 @@ Depending on the forecast type, an object of class Convert a data.frame or similar of forecasts into an object of class \verb{forecast_*} and validate it. -\code{as_forecast()} determines the forecast type (binary, point, sample-based or +\code{as_forecast()} +\itemize{ +\item allows users to specify the current names of the columns that correspond +to the columns required by \code{scoringutils} (\code{observed}, \code{predicted}, +\code{model}, as well \code{quantile_level} for quantile-based forecasts and +\code{sample_id} for sample-based forecasts). \code{as_forecast()} renames the +existing columns. +\item allows users to specify the unit of a single forecast. It removes all +columns that are neither part of the forecast unit nor a required column +(see \code{\link[=set_forecast_unit]{set_forecast_unit()}} for details) +\item Determines the forecast type (binary, point, sample-based or quantile-based) from the input data (using the function -\code{\link[=get_forecast_type]{get_forecast_type()}}. It then constructs an object of the -appropriate class (\code{forecast_binary}, \code{forecast_point}, \code{forecast_sample}, or +\code{\link[=get_forecast_type]{get_forecast_type()}}. +\item Constructs a forecast object of the appropriate class +(\code{forecast_binary}, \code{forecast_point}, \code{forecast_sample}, or \code{forecast_quantile}, using the function \code{\link[=new_forecast]{new_forecast()}}). -Lastly, it calls \code{\link[=as_forecast]{as_forecast()}} on the object to make sure it conforms with -the required input formats. +\item Calls \code{\link[=validate_forecast]{validate_forecast()}} on the newly created forecast object to +validate it +} } \section{Forecast types and input format}{ Various different forecast types / forecast formats are supported. At the From 842feacaf1f4530d4f1d553ca4c664e6980e9fa8 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 23 Feb 2024 12:23:43 -0600 Subject: [PATCH 09/10] Add an argument forecast_type to `as_forecast()` --- R/validate.R | 16 ++++++++++++++++ tests/testthat/test-as_forecast.R | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/R/validate.R b/R/validate.R index 3464937d4..307a58249 100644 --- a/R/validate.R +++ b/R/validate.R @@ -45,6 +45,10 @@ as_forecast <- function(data, #' If `NULL` (the default), all columns that are not required columns are #' assumed to form the unit of a single forecast. If specified, all columns #' that are not part of the forecast unit (or required columns) will be removed. +#' @param forecast_type (optional) The forecast type you expect the forecasts +#' to have. If the forecast type as determined by `scoringutils` based on the +#' input does not match this, an error will be thrown. If `NULL` (the default), +#' the forecast type will be inferred from the data. #' @param observed (optional) Name of the column in `data` that contains the #' observed values. This column will be renamed to "observed". #' @param predicted (optional) Name of the column in `data` that contains the @@ -61,6 +65,7 @@ as_forecast <- function(data, #' @export as_forecast.default <- function(data, forecast_unit = NULL, + forecast_type = NULL, observed = NULL, predicted = NULL, model = NULL, @@ -110,8 +115,19 @@ as_forecast.default <- function(data, } # find forecast type + desired_forecast_type <- forecast_type forecast_type <- get_forecast_type(data) + if (!is.null(desired_forecast_type)) { + if (forecast_type != desired_forecast_type) { + stop( + "Forecast type determined by scoringutils based on input: `", + forecast_type, + "`. Desired forecast type: `", desired_forecast_type, "`." + ) + } + } + # construct class data <- new_forecast(data, paste0("forecast_", forecast_type)) diff --git a/tests/testthat/test-as_forecast.R b/tests/testthat/test-as_forecast.R index 158d26d86..b2377772d 100644 --- a/tests/testthat/test-as_forecast.R +++ b/tests/testthat/test-as_forecast.R @@ -35,6 +35,13 @@ test_that("as_forecast() works as expected", { "target_end_date", "horizon"), sample_id = "sample") ) + + # test if desired forecast type does not correspond to inferred one + test <- na.omit(data.table::copy(example_continuous)) + expect_error( + as_forecast(test, forecast_type = "quantile"), + "Forecast type determined by scoringutils based on input" + ) }) From 23b708eb92831f490a36cffa54f6c0f5e709e81d Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 23 Feb 2024 12:32:13 -0600 Subject: [PATCH 10/10] Fix linting issue, update docs --- R/validate.R | 16 +++++++--------- man/as_forecast.Rd | 6 ++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/validate.R b/R/validate.R index 307a58249..cc5b02c39 100644 --- a/R/validate.R +++ b/R/validate.R @@ -115,17 +115,15 @@ as_forecast.default <- function(data, } # find forecast type - desired_forecast_type <- forecast_type + desired <- forecast_type forecast_type <- get_forecast_type(data) - if (!is.null(desired_forecast_type)) { - if (forecast_type != desired_forecast_type) { - stop( - "Forecast type determined by scoringutils based on input: `", - forecast_type, - "`. Desired forecast type: `", desired_forecast_type, "`." - ) - } + if (!is.null(desired) && desired != forecast_type) { + stop( + "Forecast type determined by scoringutils based on input: `", + forecast_type, + "`. Desired forecast type: `", desired, "`." + ) } # construct class diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd index fde4b04bd..9e3ed1132 100644 --- a/man/as_forecast.Rd +++ b/man/as_forecast.Rd @@ -10,6 +10,7 @@ as_forecast(data, ...) \method{as_forecast}{default}( data, forecast_unit = NULL, + forecast_type = NULL, observed = NULL, predicted = NULL, model = NULL, @@ -30,6 +31,11 @@ If \code{NULL} (the default), all columns that are not required columns are assumed to form the unit of a single forecast. If specified, all columns that are not part of the forecast unit (or required columns) will be removed.} +\item{forecast_type}{(optional) The forecast type you expect the forecasts +to have. If the forecast type as determined by \code{scoringutils} based on the +input does not match this, an error will be thrown. If \code{NULL} (the default), +the forecast type will be inferred from the data.} + \item{observed}{(optional) Name of the column in \code{data} that contains the observed values. This column will be renamed to "observed".}