From 6c23144bb86157b7c5846bb0c42a6cb4f0a0dce0 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 10:05:00 +0100 Subject: [PATCH 1/7] Create a new function to convert from a quantile to an interval format --- NAMESPACE | 2 + R/utils_data_handling.R | 113 ++++++++++++++++++ man/quantile_to_interval.Rd | 67 +++++++++++ tests/testthat/_snaps/utils_data_handling.md | 70 +++++++++++ tests/testthat/test-utils_data_handling.R | 117 +++++++++++++++++++ 5 files changed, 369 insertions(+) create mode 100644 man/quantile_to_interval.Rd create mode 100644 tests/testthat/_snaps/utils_data_handling.md diff --git a/NAMESPACE b/NAMESPACE index b5a893ac4..67415dbe9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ S3method(plot,scoringutils_available_forecasts) S3method(print,scoringutils_check) +S3method(quantile_to_interval,data.frame) +S3method(quantile_to_interval,numeric) S3method(score,default) S3method(score,scoringutils_binary) S3method(score,scoringutils_point) diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index a53d9c048..4e8ad71a0 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -158,6 +158,119 @@ range_long_to_quantile <- function(data, } +#' Transform From a Quantile Format to an Interval Format +#' @description +#' **Quantile format** +#' In a quantile format, a prediction is characterised by one or multiple +#' predicted values and the corresponding quantile levels. For example, a +#' prediction in a quantile format could be represented by the 0.05, 0.25, 0.5, +#' 0.75 and 0.95 quantiles of the predictive distribution. +#' +#' **Interval format** +#' In the interval format, two quantiles are assumed to form a prediction +#' interval. Prediction intervals need to be symmetric around the median and +#' are characterised by a lower and an upper bound. The lower bound is defined +#' by the lower quantile and the upper bound is defined by the upper quantile. +#' A 90% prediction interval, for example, covers 90% of the probability mass +#' and is defined by the 5% and 95% quantiles. A forecast could therefore +#' be characterised by one or multiple prediction intervals, e.g. the lower +#' and upper bounds of the 50% and 90% prediction intervals (corresponding to +#' the 0.25 and 0.75 as well as the 0.05 and 0.095 quantiles). +#' @param ... Arguments passed down to the method +quantile_to_interval <- function(...) { + UseMethod("quantile_to_interval") +} + + +#' @param dt a data.table with columns `quantile` and `predicted` +#' @param format the format of the output. Either "long" or "wide". If "long" +#' (the default), there will be a column `boundary` (with values either "upper" +#' or "lower" and a column `range` that contains the range of the interval. +#' If "wide", there will be a column `range` and two columns +#' `lower` and `upper` that contain the lower and upper bounds of the +#' prediction interval, respectively. +#' @param keep_quantile_col keep the quantile column in the final +#' output after transformation (default is FALSE). This only works if +#' `format = "long"`. If `format = "wide"`, the quantile column will always be +#' dropped. +#' @return +#' *quantile_to_interval.data.frame*: +#' a data.frame in an interval format (either "long" or "wide"), with or +#' without a quantile column. Rows will not be reordered. +#' @export +#' @rdname quantile_to_interval +quantile_to_interval.data.frame <- function( + dt, + format = "long", + keep_quantile_col = FALSE +) { + # make sure dt is a data.table + assert_data_frame(dt) + if (!is.data.table(dt)) { + dt <- data.table::as.data.table(dt) + } else { + # use copy to avoid + dt <- copy(dt) + } + + dt[, boundary := ifelse(quantile <= 0.5, "lower", "upper")] + dt[, range := ifelse( + boundary == "lower", + round((1 - 2 * quantile) * 100, 10), + round((2 * quantile - 1) * 100, 10) + )] + + # add median quantile + median <- dt[quantile == 0.5, ] + median[, boundary := "upper"] + + dt <- data.table::rbindlist(list(dt, median)) + if (!keep_quantile_col) { + dt[, quantile := NULL] + } + + if (format == "wide") { + delete_columns(dt, "quantile") + dt <- dcast(dt, ... ~ boundary, value.var = c("predicted")) + } + return(dt[]) +} + + +#' @param observed a numeric vector of observed values of size n +#' @param predicted a numeric vector of predicted values of size n x N. If +#' `observed` is a single number, then `predicted` can be a vector of length N +#' @param quantile a numeric vector of quantile levels of size N +#' @return +#' *quantile_to_interval.numeric*: +#' a data.frame in a wide interval format with columns `forecast_id`, +#' `observed`, `lower`, `upper`, and `range`. The `forecast_id` column is a +#' unique identifier for each forecast. Rows will be reordered according to +#' `forecast_id` and `range`. +#' @export +#' @rdname quantile_to_interval +quantile_to_interval.numeric <- function( + observed, + predicted, + quantile +) { + scoringutils:::assert_input_quantile(observed, predicted, quantile) + + n <- length(observed) + N <- length(quantile) + + dt <- data.table( + forecast_id = rep(1:n, each = N), + observed = rep(observed, each = N), + predicted = as.vector(t(predicted)), + quantile = quantile + ) + out <- quantile_to_interval(dt, format = "wide") + out <- out[order(forecast_id, range)] + return(out) +} + + #' @title Change Data from a Plain Quantile Format to a Long Range Format #' #' @description diff --git a/man/quantile_to_interval.Rd b/man/quantile_to_interval.Rd new file mode 100644 index 000000000..787bc5cca --- /dev/null +++ b/man/quantile_to_interval.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_data_handling.R +\name{quantile_to_interval} +\alias{quantile_to_interval} +\alias{quantile_to_interval.data.frame} +\alias{quantile_to_interval.numeric} +\title{Transform From a Quantile Format to an Interval Format} +\usage{ +quantile_to_interval(...) + +\method{quantile_to_interval}{data.frame}(dt, format = "long", keep_quantile_col = FALSE) + +\method{quantile_to_interval}{numeric}(observed, predicted, quantile) +} +\arguments{ +\item{...}{Arguments passed down to the method} + +\item{dt}{a data.table with columns \code{quantile} and \code{predicted}} + +\item{format}{the format of the output. Either "long" or "wide". If "long" +(the default), there will be a column \code{boundary} (with values either "upper" +or "lower" and a column \code{range} that contains the range of the interval. +If "wide", there will be a column \code{range} and two columns +\code{lower} and \code{upper} that contain the lower and upper bounds of the +prediction interval, respectively.} + +\item{keep_quantile_col}{keep the quantile column in the final +output after transformation (default is FALSE). This only works if +\code{format = "long"}. If \code{format = "wide"}, the quantile column will always be +dropped.} + +\item{observed}{a numeric vector of observed values of size n} + +\item{predicted}{a numeric vector of predicted values of size n x N. If +\code{observed} is a single number, then \code{predicted} can be a vector of length N} + +\item{quantile}{a numeric vector of quantile levels of size N} +} +\value{ +\emph{quantile_to_interval.data.frame}: +a data.frame in an interval format (either "long" or "wide"), with or +without a quantile column. Rows will not be reordered. + +\emph{quantile_to_interval.numeric}: +a data.frame in a wide interval format with columns \code{forecast_id}, +\code{observed}, \code{lower}, \code{upper}, and \code{range}. The \code{forecast_id} column is a +unique identifier for each forecast. Rows will be reordered according to +\code{forecast_id} and \code{range}. +} +\description{ +\strong{Quantile format} +In a quantile format, a prediction is characterised by one or multiple +predicted values and the corresponding quantile levels. For example, a +prediction in a quantile format could be represented by the 0.05, 0.25, 0.5, +0.75 and 0.95 quantiles of the predictive distribution. + +\strong{Interval format} +In the interval format, two quantiles are assumed to form a prediction +interval. Prediction intervals need to be symmetric around the median and +are characterised by a lower and an upper bound. The lower bound is defined +by the lower quantile and the upper bound is defined by the upper quantile. +A 90\% prediction interval, for example, covers 90\% of the probability mass +and is defined by the 5\% and 95\% quantiles. A forecast could therefore +be characterised by one or multiple prediction intervals, e.g. the lower +and upper bounds of the 50\% and 90\% prediction intervals (corresponding to +the 0.25 and 0.75 as well as the 0.05 and 0.095 quantiles). +} diff --git a/tests/testthat/_snaps/utils_data_handling.md b/tests/testthat/_snaps/utils_data_handling.md new file mode 100644 index 000000000..285514031 --- /dev/null +++ b/tests/testthat/_snaps/utils_data_handling.md @@ -0,0 +1,70 @@ +# quantile_to_range works - scalar and vector case + + Code + out1 + Output + forecast_id observed range lower upper + 1: 1 5 0 5 5 + 2: 1 5 20 4 6 + 3: 1 5 40 3 7 + 4: 1 5 60 2 8 + 5: 1 5 80 1 9 + +--- + + Code + out3 + Output + forecast_id observed range lower upper + 1: 1 5 0 5 5 + 2: 1 5 20 4 6 + 3: 1 5 40 3 7 + 4: 1 5 60 2 8 + 5: 1 5 80 1 NA + +--- + + Code + out4 + Output + forecast_id observed range lower upper + 1: 1 5 0 5 5 + 2: 1 5 20 4 6 + 3: 1 5 40 3 7 + 4: 1 5 60 2 8 + 5: 1 5 80 1 NA + 6: 1 5 90 NA 9 + +# quantile_to_range works - matrix case + + Code + out1 + Output + forecast_id observed range lower upper + 1: 1 21 0 21 21 + 2: 1 21 20 16 26 + 3: 1 21 40 11 31 + 4: 1 21 60 6 36 + 5: 1 21 80 1 41 + 6: 2 22 0 22 22 + 7: 2 22 20 17 27 + 8: 2 22 40 12 32 + 9: 2 22 60 7 37 + 10: 2 22 80 2 42 + 11: 3 23 0 23 23 + 12: 3 23 20 18 28 + 13: 3 23 40 13 33 + 14: 3 23 60 8 38 + 15: 3 23 80 3 43 + 16: 4 24 0 24 24 + 17: 4 24 20 19 29 + 18: 4 24 40 14 34 + 19: 4 24 60 9 39 + 20: 4 24 80 4 44 + 21: 5 25 0 25 25 + 22: 5 25 20 20 30 + 23: 5 25 40 15 35 + 24: 5 25 60 10 40 + 25: 5 25 80 5 45 + forecast_id observed range lower upper + diff --git a/tests/testthat/test-utils_data_handling.R b/tests/testthat/test-utils_data_handling.R index 609c49fe2..2e0257d72 100644 --- a/tests/testthat/test-utils_data_handling.R +++ b/tests/testthat/test-utils_data_handling.R @@ -123,3 +123,120 @@ test_that("sample_to_range_long works", { expect_equal(long, as.data.frame(long2)) }) + +test_that("quantile_to_range works - scalar and vector case", { + predicted <- 9:1 + quantile <- rev(seq(0.1, 0.9, 0.1)) + observed <- 5 + + # check output is produced + out1 <- quantile_to_interval(observed, predicted, quantile) + expect_snapshot(out1) + + # check order of predictions doesn't matter + predicted <- 1:9 + quantile <- seq(0.1, 0.9, 0.1) + out2 <- quantile_to_interval(observed, predicted, quantile) + expect_equal(out1, out2) + + # check error if observed is a vector and predicted is a vector as well + expect_error(quantile_to_interval( + observed = c(1, 2), predicted = c(1, 2), quantile = c(0.1, 0.9)), + "Assertion on 'predicted' failed: Must be of type 'matrix', not 'double'." + ) + + # check NA values are handled gracefully - there should just be NA in the output + predicted <- c(1:8, NA) + quantile <- seq(0.1, 0.9, 0.1) + observed <- 5 + out3 <- quantile_to_interval(observed, predicted, quantile) + expect_snapshot(out3) + + # check non-symmetrical intervals are handled gracefully + # result should be newly introduced ranges where one value is NA + predicted <- c(1:9) + quantile <- c(seq(0.1, 0.8, 0.1), 0.95) + observed <- 5 + out4 <- quantile_to_interval(observed, predicted, quantile) + expect_snapshot(out4) + + # check function works without a median + predicted <- c(1:8) + quantile <- c(seq(0.1, 0.4, 0.1), seq(0.6, 0.9, 0.1)) + observed <- 5 + expect_no_condition( + quantile_to_interval(observed, predicted, quantile) + ) + + # check a one-dimensional matrix works fine + predicted <- matrix(1:9, nrow = 1) + quantile <- seq(0.1, 0.9, 0.1) + expect_no_condition( + quantile_to_interval(observed, predicted, quantile) + ) +}) + +test_that("quantile_to_range works - matrix case", { + n <- 5 + N <- 9 + predicted <- matrix(1:45, nrow = n, ncol = N) + quantile <- seq(0.1, 0.9, 0.1) + observed <- seq(21, 25, 1) + + # check output is produced + out1 <- quantile_to_interval(observed, predicted, quantile) + expect_snapshot(out1) + + # check order of predictions doesn't matter + predicted <- matrix( + c(41:45, 36:40, 31:35, 26:30, 21:25, 16:20, 11:15, 6:10, 1:5), + nrow = n, + ncol = N + ) + quantile <- rev(seq(0.1, 0.9, 0.1)) + out2 <- quantile_to_interval(observed, predicted, quantile) + expect_equal(out1, out2) + + # check NA values are fine + predicted[1, 1] <- NA + expect_no_condition( + quantile_to_interval(observed, predicted, quantile) + ) +}) + + +test_that("quantile_to_interval works - data.frame case", { + dt <- data.table( + observed = 5, + predicted = 1:9, + quantile = seq(0.1, 0.9, 0.1) + ) + + expect_no_condition( + quantile_to_interval(dt) + ) + + expect_no_condition( + quantile_to_interval(dt, format = "wide") + ) + + # check that the number of rows after transformation is equal to the number + # of rows plus the number of medians added (as upper boundary of a 0% + # prediction interval) + ex <- example_quantile[!is.na(predicted)] + n_preds <- nrow(ex) + n_medians <- nrow(ex[quantile == 0.5]) + ex_interval <- quantile_to_interval(ex, keep_quantile_col = TRUE) + expect_equal( + nrow(ex_interval), + n_preds + n_medians + ) + + expect_equal( + colnames(ex_interval), + c(colnames(ex), "boundary", "range") + ) +}) + + + From 475bdb07ec2f3c0d62293758fefdf9403c25a083 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 10:17:49 +0100 Subject: [PATCH 2/7] remove test that doesn't make any sense since we explicitly created scoring for point forecasts --- tests/testthat/test-summarise_scores.R | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/testthat/test-summarise_scores.R b/tests/testthat/test-summarise_scores.R index d3fd830ce..dc3de70e3 100644 --- a/tests/testthat/test-summarise_scores.R +++ b/tests/testthat/test-summarise_scores.R @@ -30,19 +30,7 @@ test_that("summarise_scores() handles wrong by argument well", { ) }) -test_that("summarise_scores() works with point forecasts in a quantile format", { - ex <- data.table::copy(example_quantile) - ex <- ex[quantile == 0.5][, quantile := NA_real_] - - scores <- suppressMessages(score(ex)) - - scores_summarised <-summarise_scores(scores, by = "model", na.rm = TRUE) - expect_warning( - expect_warning( - add_pairwise_comparison(scores_summarised) - ) - ) - +test_that("summarise_scores() works with point forecasts", { summarised_scores <- summarise_scores(scores_point, by = "model") expect_no_condition( From 1972bb7abd1704dfb8fa563097f04646029788ea Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 10:21:26 +0100 Subject: [PATCH 3/7] Adapt tests to test `quantile_to_interval` instead of `quantile_to_range_long` --- tests/testthat/test-utils_data_handling.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-utils_data_handling.R b/tests/testthat/test-utils_data_handling.R index 2e0257d72..6e53cdbaa 100644 --- a/tests/testthat/test-utils_data_handling.R +++ b/tests/testthat/test-utils_data_handling.R @@ -22,7 +22,7 @@ test_that("range_long_to_quantile works", { -test_that("quantile_to_range_long works", { +test_that("quantile_to_interval works", { quantile <- data.frame( date = as.Date("2020-01-01") + 1:10, model = "model1", @@ -40,7 +40,8 @@ test_that("quantile_to_range_long works", { boundary = rep(c("lower", "upper"), each = 10) ) - long2 <- as.data.frame(scoringutils:::quantile_to_range_long(quantile, + long2 <- as.data.frame(quantile_to_interval( + quantile, keep_quantile_col = FALSE )) From f60cc6dfa1a8cf702bdab86cc11b97ca6fc40e38 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 10:50:42 +0100 Subject: [PATCH 4/7] Remove function `quantile_to_range_long()` everywhere and replace it by `quantile_to_interval()` --- R/plot.R | 2 +- R/score_quantile.R | 2 +- R/utils_data_handling.R | 50 ++--------------------------------- R/z_globalVariables.R | 2 ++ man/quantile_to_range_long.Rd | 22 --------------- 5 files changed, 6 insertions(+), 72 deletions(-) delete mode 100644 man/quantile_to_range_long.Rd diff --git a/R/plot.R b/R/plot.R index 73cef10cf..f90be0599 100644 --- a/R/plot.R +++ b/R/plot.R @@ -413,7 +413,7 @@ plot_predictions <- function(data, # range data if (test_forecast_type_is_quantile(data)) { - forecasts <- quantile_to_range_long( + forecasts <- quantile_to_interval( forecasts, keep_quantile_col = FALSE ) diff --git a/R/score_quantile.R b/R/score_quantile.R index 89e3246c5..c7b6d893d 100644 --- a/R/score_quantile.R +++ b/R/score_quantile.R @@ -32,7 +32,7 @@ score_quantile <- function(data, data <- remove_na_observed_predicted(data) # make sure to have both quantile as well as range format -------------------- - range_data <- quantile_to_range_long(data, + range_data <- quantile_to_interval( data, keep_quantile_col = FALSE ) # adds the range column to the quantile data set diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index 4e8ad71a0..d27102ba6 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -197,6 +197,7 @@ quantile_to_interval <- function(...) { #' *quantile_to_interval.data.frame*: #' a data.frame in an interval format (either "long" or "wide"), with or #' without a quantile column. Rows will not be reordered. +#' @importFrom data.table copy #' @export #' @rdname quantile_to_interval quantile_to_interval.data.frame <- function( @@ -271,53 +272,6 @@ quantile_to_interval.numeric <- function( } -#' @title Change Data from a Plain Quantile Format to a Long Range Format -#' -#' @description -#' -#' Transform data from a format that uses quantiles only to one that uses -#' interval ranges to denote quantiles. -#' -#' @param data a data.frame in quantile format -#' @param keep_quantile_col keep the quantile column in the final -#' output after transformation (default is FALSE) -#' @return a data.frame in a long interval range format -#' @importFrom data.table copy -#' @keywords internal - -quantile_to_range_long <- function(data, - keep_quantile_col = TRUE) { - data <- data.table::as.data.table(data) - - data[, boundary := ifelse(quantile <= 0.5, "lower", "upper")] - data[, range := ifelse( - boundary == "lower", - round((1 - 2 * quantile) * 100, 10), - round((2 * quantile - 1) * 100, 10) - )] - - # add median quantile - median <- data[quantile == 0.5, ] - median[, boundary := "upper"] - - data <- data.table::rbindlist(list(data, median)) - - if (!keep_quantile_col) { - data[, "quantile" := NULL] - } - - # if only point forecasts are scored, we only have NA values for range and - # boundary. In that instance we need to set the type of the columns - # explicitly to avoid future collisions. - data[, `:=`( - boundary = as.character(boundary), - range = as.numeric(range) - )] - - return(data[]) -} - - #' @title Change Data from a Sample Based Format to a Long Interval Range Format #' #' @description @@ -353,7 +307,7 @@ sample_to_range_long <- function(data, type = type ) - data <- quantile_to_range_long(data, keep_quantile_col = keep_quantile_col) + data <- quantile_to_interval(data, keep_quantile_col = keep_quantile_col) return(data[]) } diff --git a/R/z_globalVariables.R b/R/z_globalVariables.R index 6a0ee2ff5..3f5591611 100644 --- a/R/z_globalVariables.R +++ b/R/z_globalVariables.R @@ -25,9 +25,11 @@ globalVariables(c( "dss", "existing", "fill_col", + "forecast_id", "hist", "identifCol", "Interval_Score", + "interval_range", "overprediction", "underprediction", "quantile_coverage", diff --git a/man/quantile_to_range_long.Rd b/man/quantile_to_range_long.Rd deleted file mode 100644 index ad214f3e5..000000000 --- a/man/quantile_to_range_long.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_data_handling.R -\name{quantile_to_range_long} -\alias{quantile_to_range_long} -\title{Change Data from a Plain Quantile Format to a Long Range Format} -\usage{ -quantile_to_range_long(data, keep_quantile_col = TRUE) -} -\arguments{ -\item{data}{a data.frame in quantile format} - -\item{keep_quantile_col}{keep the quantile column in the final -output after transformation (default is FALSE)} -} -\value{ -a data.frame in a long interval range format -} -\description{ -Transform data from a format that uses quantiles only to one that uses -interval ranges to denote quantiles. -} -\keyword{internal} From a56b58713eccca7ec87b1953385a9a55ecbac8b4 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 11:28:36 +0100 Subject: [PATCH 5/7] Fix warning about S3 generic / method consistency by adding `...` to methods --- R/utils_data_handling.R | 10 ++++++---- man/quantile_to_interval.Rd | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index d27102ba6..420e13f2f 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -176,7 +176,7 @@ range_long_to_quantile <- function(data, #' be characterised by one or multiple prediction intervals, e.g. the lower #' and upper bounds of the 50% and 90% prediction intervals (corresponding to #' the 0.25 and 0.75 as well as the 0.05 and 0.095 quantiles). -#' @param ... Arguments passed down to the method +#' @param ... method arguments quantile_to_interval <- function(...) { UseMethod("quantile_to_interval") } @@ -203,7 +203,8 @@ quantile_to_interval <- function(...) { quantile_to_interval.data.frame <- function( dt, format = "long", - keep_quantile_col = FALSE + keep_quantile_col = FALSE, + ... ) { # make sure dt is a data.table assert_data_frame(dt) @@ -253,9 +254,10 @@ quantile_to_interval.data.frame <- function( quantile_to_interval.numeric <- function( observed, predicted, - quantile + quantile, + ... ) { - scoringutils:::assert_input_quantile(observed, predicted, quantile) + assert_input_quantile(observed, predicted, quantile) n <- length(observed) N <- length(quantile) diff --git a/man/quantile_to_interval.Rd b/man/quantile_to_interval.Rd index 787bc5cca..e99cbadad 100644 --- a/man/quantile_to_interval.Rd +++ b/man/quantile_to_interval.Rd @@ -8,12 +8,12 @@ \usage{ quantile_to_interval(...) -\method{quantile_to_interval}{data.frame}(dt, format = "long", keep_quantile_col = FALSE) +\method{quantile_to_interval}{data.frame}(dt, format = "long", keep_quantile_col = FALSE, ...) -\method{quantile_to_interval}{numeric}(observed, predicted, quantile) +\method{quantile_to_interval}{numeric}(observed, predicted, quantile, ...) } \arguments{ -\item{...}{Arguments passed down to the method} +\item{...}{method arguments} \item{dt}{a data.table with columns \code{quantile} and \code{predicted}} From b05db0da0c763af78fd9238296302cd92b4fbf7f Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 11:40:43 +0100 Subject: [PATCH 6/7] remove unnecessary assert statement --- R/utils_data_handling.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index 420e13f2f..bd9152324 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -206,8 +206,6 @@ quantile_to_interval.data.frame <- function( keep_quantile_col = FALSE, ... ) { - # make sure dt is a data.table - assert_data_frame(dt) if (!is.data.table(dt)) { dt <- data.table::as.data.table(dt) } else { From 308eae4b75fcfc3bbdc8bded46019d4bccfe8e2f Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Thu, 2 Nov 2023 13:38:12 +0100 Subject: [PATCH 7/7] Lint files --- R/check-input-helpers.R | 23 ++--------------------- R/get_-functions.R | 8 +++++--- R/pairwise-comparisons.R | 3 --- R/score_quantile.R | 27 ++++++++++++++++----------- R/utils.R | 7 +++++-- R/utils_data_handling.R | 22 +++++++++------------- 6 files changed, 37 insertions(+), 53 deletions(-) diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 37c8c8a22..cfaa24b2c 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -114,8 +114,8 @@ assert_not_null <- function(...) { #' #' @keywords internal assert_equal_length <- function(..., - one_allowed = TRUE, - call_levels_up = 2) { + one_allowed = TRUE, + call_levels_up = 2) { vars <- list(...) lengths <- lengths(vars) @@ -283,25 +283,6 @@ check_duplicates <- function(data, forecast_unit = NULL) { } -# Function to check input for methods -# there should not be a name clash between a metric and a column name -# --> maybe this should be checked by the actual method that computes scores -# check whether any column name is a scoringutils metric -# clashing_colnames <- intersect(colnames(data), available_metrics()) -# if (length(clashing_colnames) > 0) { -# clashing_colnames <- paste0('"', clashing_colnames, '"') -# warnings <- c( -# warnings, -# paste0( -# "At least one column in the data ", -# "(", toString(clashing_colnames), ") ", -# "corresponds to the name of a metric that will be computed by ", -# "scoringutils. Please check `available_metrics()`" -# ) -# ) -# } - - #' Check column names are present in a data.frame #' @description #' The functions loops over the column names and checks whether they are diff --git a/R/get_-functions.R b/R/get_-functions.R index f9b88a971..22aaa47a9 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -23,9 +23,11 @@ get_forecast_type <- function(data) { } else if (test_forecast_type_is_point(data)) { forecast_type <- "point" } else { - stop("Checking `data`: input doesn't satisfy the criteria for any forecast type.", - "Are you missing a column `quantile` or `sample_id`?", - "Please check the vignette for additional info.") + stop( + "Checking `data`: input doesn't satisfy criteria for any forecast type.", + "Are you missing a column `quantile` or `sample_id`?", + "Please check the vignette for additional info." + ) } conflict <- check_attribute_conflict(data, "forecast_type", forecast_type) if (!is.logical(conflict)) { diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index d842ede72..f23316254 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -65,10 +65,7 @@ pairwise_comparison <- function(scores, metric = "auto", baseline = NULL, ...) { - - # metric_names <- get_metrics(scores) metric <- match.arg(metric, c("auto", available_metrics())) - if (!is.data.table(scores)) { scores <- as.data.table(scores) } else { diff --git a/R/score_quantile.R b/R/score_quantile.R index c7b6d893d..f2da97f04 100644 --- a/R/score_quantile.R +++ b/R/score_quantile.R @@ -32,11 +32,13 @@ score_quantile <- function(data, data <- remove_na_observed_predicted(data) # make sure to have both quantile as well as range format -------------------- - range_data <- quantile_to_interval( data, + range_data <- quantile_to_interval( + data, keep_quantile_col = FALSE ) # adds the range column to the quantile data set - quantile_data <- range_long_to_quantile(range_data, + quantile_data <- range_long_to_quantile( + range_data, keep_range_col = TRUE ) @@ -96,14 +98,15 @@ score_quantile <- function(data, # compute absolute and squared error for point forecasts # these are marked by an NA in range, and a numeric value for point - if (any(c("se_point, se_mean, ae_point", "ae_median", "absolute_error") %in% metrics)) { - if ("point" %in% colnames(res)) { - res[ - is.na(range) & is.numeric(point), - `:=`(ae_point = abs_error(predicted = point, observed), - se_point = squared_error(predicted = point, observed)) - ] - } + compute_point <- any( + c("se_point, se_mean, ae_point", "ae_median", "absolute_error") %in% metrics + ) + if (compute_point && "point" %in% colnames(res)) { + res[ + is.na(range) & is.numeric(point), + `:=`(ae_point = abs_error(predicted = point, observed), + se_point = squared_error(predicted = point, observed)) + ] } # calculate scores on quantile format ---------------------------------------- @@ -156,7 +159,9 @@ score_quantile <- function(data, } # delete internal columns before returning result - res <- delete_columns(res, c("upper", "lower", "boundary", "point", "observed")) + res <- delete_columns( + res, c("upper", "lower", "boundary", "point", "observed") + ) return(res[]) } diff --git a/R/utils.R b/R/utils.R index cdb882093..c0c0a34e1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -184,11 +184,14 @@ remove_scoringutils_class <- function(object) { return(object) } # check if "scoringutils_" is in name of any class - if (any(grepl("scoringutils_", class(object)))) { + if (any(grepl("scoringutils_", class(object), fixed = TRUE))) { stored_attributes <- get_scoringutils_attributes(object) # remove all classes that contain "scoringutils_" - class(object) <- class(object)[!grepl("scoringutils_", class(object))] + class(object) <- class(object)[!grepl( + "scoringutils_", class(object), + fixed = TRUE + )] # remove all scoringutils attributes object <- strip_attributes(object, names(stored_attributes)) diff --git a/R/utils_data_handling.R b/R/utils_data_handling.R index bd9152324..d0677ca34 100644 --- a/R/utils_data_handling.R +++ b/R/utils_data_handling.R @@ -200,12 +200,10 @@ quantile_to_interval <- function(...) { #' @importFrom data.table copy #' @export #' @rdname quantile_to_interval -quantile_to_interval.data.frame <- function( - dt, - format = "long", - keep_quantile_col = FALSE, - ... -) { +quantile_to_interval.data.frame <- function(dt, + format = "long", + keep_quantile_col = FALSE, + ...) { if (!is.data.table(dt)) { dt <- data.table::as.data.table(dt) } else { @@ -231,7 +229,7 @@ quantile_to_interval.data.frame <- function( if (format == "wide") { delete_columns(dt, "quantile") - dt <- dcast(dt, ... ~ boundary, value.var = c("predicted")) + dt <- dcast(dt, ... ~ boundary, value.var = "predicted") } return(dt[]) } @@ -249,12 +247,10 @@ quantile_to_interval.data.frame <- function( #' `forecast_id` and `range`. #' @export #' @rdname quantile_to_interval -quantile_to_interval.numeric <- function( - observed, - predicted, - quantile, - ... -) { +quantile_to_interval.numeric <- function(observed, + predicted, + quantile, + ...) { assert_input_quantile(observed, predicted, quantile) n <- length(observed)