diff --git a/R/available_forecasts.R b/R/available_forecasts.R index b8c15c216..17418b0a6 100644 --- a/R/available_forecasts.R +++ b/R/available_forecasts.R @@ -13,11 +13,13 @@ #' all available columns (apart from a few "protected" columns such as #' 'predicted' and 'observed') plus "quantile" or "sample_id" where present). #' -#' @param collapse character vector (default is `c("quantile", "sample"`) with -#' names of categories for which the number of rows should be collapsed to one -#' when counting. For example, a single forecast is usually represented by a +#' @param collapse character vector (default is `c("quantile", "sample_id"`) +#' with names of categories for which the number of rows should be collapsed to +#' one when counting. For example, a single forecast is usually represented by a #' set of several quantiles or samples and collapsing these to one makes sure -#' that a single forecast only gets counted once. +#' that a single forecast only gets counted once. Setting `collapse = c()` +#' would mean that all quantiles / samples would be counted as individual +#' forecasts. #' #' @return A data.table with columns as specified in `by` and an additional #' column "count" with the number of forecasts. @@ -30,12 +32,11 @@ #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN #' #' available_forecasts(example_quantile, -#' collapse = c("quantile"), #' by = c("model", "target_type") #' ) available_forecasts <- function(data, by = NULL, - collapse = c("quantile", "sample")) { + collapse = c("quantile", "sample_id")) { data <- validate(data) forecast_unit <- attr(data, "forecast_unit") @@ -48,7 +49,7 @@ available_forecasts <- function(data, # collapse several rows to 1, e.g. treat a set of 10 quantiles as one, # because they all belong to one single forecast that should be counted once collapse_by <- setdiff( - c(forecast_unit, "quantile", "sample"), + c(forecast_unit, "quantile", "sample_id"), collapse ) # filter out "quantile" or "sample" if present in collapse_by, but not data diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index d1fa62edc..37c8c8a22 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -2,10 +2,9 @@ #' #' @description Helper function #' @param x input to check -#' @param x additional arguments to pass to `check_numeric()` +#' @inheritDotParams checkmate::check_numeric #' @importFrom checkmate check_atomic_vector check_numeric -#' @return Either TRUE if the test is successful or a string with an error -#' message +#' @inherit document_check_functions return #' @keywords internal check_numeric_vector <- function(x, ...) { # check functions must return TRUE on success @@ -21,36 +20,6 @@ check_numeric_vector <- function(x, ...) { } -#' @title Check whether the desired metrics are available in scoringutils -#' -#' @description Helper function to check whether desired metrics are -#' available. If the input is `NULL`, all metrics will be returned. -#' -#' @param metrics character vector with desired metrics -#' -#' @return A character vector with metrics that can be used for downstream -#' computation -#' -#' @keywords internal - -check_metrics <- function(metrics) { - # use all available metrics if none are given - if (is.null(metrics)) { - metrics <- available_metrics() - } - - # check desired metrics are actually available in scoringutils - available_metrics <- available_metrics() - if (!all(metrics %in% available_metrics)) { - msg <- paste( - "The following metrics are not available:", - toString(setdiff(metrics, available_metrics)) - ) - warning(msg) - } - return(metrics) -} - #' Check that quantiles are valid #' #' @description @@ -68,7 +37,6 @@ check_metrics <- function(metrics) { #' @return None. Function errors if quantiles are invalid. #' #' @keywords internal - check_quantiles <- function(quantiles, name = "quantiles", range = c(0, 1)) { if (any(quantiles < range[1]) || any(quantiles > range[2])) { stop(name, " must be between ", range[1], " and ", range[2]) @@ -83,13 +51,13 @@ check_quantiles <- function(quantiles, name = "quantiles", range = c(0, 1)) { #' @title Helper function to convert assert statements into checks #' #' @description Tries to execute an expression. Internally, this is used to -#' see whether assertions fail when checking inputs +#' see whether assertions fail when checking inputs (i.e. to convert an +#' `assert_*()` statement into a check). If the expression fails, the error +#' message is returned. If the expression succeeds, `TRUE` is returned. #' @param expr an expression to be evaluated #' @importFrom checkmate assert assert_numeric check_matrix -#' @return Returns TRUE if expression was executed successfully, otherwise -#' returns a string with the resulting error message +#' @inherit document_check_functions return #' @keywords internal - check_try <- function(expr) { result <- try(expr, silent = TRUE) if (is.null(result)) { @@ -100,9 +68,6 @@ check_try <- function(expr) { } - - - #' @title Check Variable is not NULL #' #' @description @@ -110,11 +75,12 @@ check_try <- function(expr) { #' variable and the function call where the variable is missing. This function #' is a helper function that should only be called within other functions #' @param ... The variables to check +#' @inherit document_assert_functions return #' @return The function returns `NULL`, but throws an error if the variable is #' missing. #' #' @keywords internal -check_not_null <- function(...) { +assert_not_null <- function(...) { vars <- list(...) varnames <- names(vars) @@ -134,7 +100,7 @@ check_not_null <- function(...) { } -#' @title Check Length +#' @title Check Length of Two Vectors is Equal #' #' @description #' Check whether variables all have the same length @@ -142,14 +108,12 @@ check_not_null <- function(...) { #' @param one_allowed logical, allow arguments of length one that can be #' recycled #' @param call_levels_up How many levels to go up when including the function -#' call in the error message. This is useful when calling `check_equal_length()` +#' call in the error message. This is useful when calling `assert_equal_length()` #' within another checking function. -#' -#' @return The function returns `NULL`, but throws an error if variable lengths -#' differ +#' @inherit document_assert_functions return #' #' @keywords internal -check_equal_length <- function(..., +assert_equal_length <- function(..., one_allowed = TRUE, call_levels_up = 2) { vars <- list(...) @@ -173,7 +137,7 @@ check_equal_length <- function(..., one_allowed, "' should have the same length (or length one). Actual lengths: ", "' should have the same length. Actual lengths: " - ) + ) stop( "Arguments to the following function call: '", @@ -186,14 +150,31 @@ check_equal_length <- function(..., } +#' @title Check Whether There Is a Conflict Between Data and Attributes +#' @description +#' Check whether there is a conflict between a stored attribute and the +#' same value as inferred from the data. For example, this could be if +#' an attribute `forecast_unit` is stored, but is different from the +#' `forecast_unit` inferred from the data. The check is successful if +#' the stored and the inferred value are the same. +#' @param object The object to check +#' @param attribute The name of the attribute to check +#' @param expected The expected value of the attribute +#' @inherit document_check_functions return +#' @keywords internal check_attribute_conflict <- function(object, attribute, expected) { existing <- attr(object, attribute) + if (is.vector(existing) && is.vector(expected)) { + existing <- sort(existing) + expected <- sort(expected) + } + if (!is.null(existing) && !identical(existing, expected)) { msg <- paste0( "Object has an attribute `", attribute, "`, but it looks different ", - "from what's expected.\n", - "Existing: ", paste(existing, collapse = ", "), "\n", - "Expected: ", paste(expected, collapse = ", "), "\n", + "from what's expected based on the data.\n", + "Existing: ", toString(existing), "\n", + "Expected: ", toString(expected), "\n", "Running `validate()` again might solve the problem" ) return(msg) @@ -202,7 +183,13 @@ check_attribute_conflict <- function(object, attribute, expected) { } - +#' @title Assure that Data Has a `model` Column +#' +#' @description +#' Check whether the data.table has a column called `model`. +#' If not, a column called `model` is added with the value `Unspecified model`. +#' @return The data.table with a column called `model` +#' @keywords internal assure_model_column <- function(data) { if (!("model" %in% colnames(data))) { message( @@ -216,11 +203,11 @@ assure_model_column <- function(data) { #' Check that all forecasts have the same number of quantiles or samples -#' @param data data.frame to check +#' @description Function checks the number of quantiles or samples per forecast. +#' If the number of quantiles or samples is the same for all forecasts, it +#' returns TRUE and a string with an error message otherwise. #' @param forecast_unit Character vector denoting the unit of a single forecast. -#' @return Returns an string with a message if any forecasts have differing -#' numbers of samples or quantiles, otherwise returns TRUE -#' +#' @inherit document_check_functions params return #' @keywords internal check_number_per_forecast <- function(data, forecast_unit) { # check whether there are the same number of quantiles, samples -------------- @@ -242,13 +229,11 @@ check_number_per_forecast <- function(data, forecast_unit) { } - - - #' Check columns in data.frame don't have NA values -#' @inheritParams check_columns_present -#' @return Returns an string with a message if any of the column names -#' have NA values, otherwise returns TRUE +#' @description Function checks whether any of the columns in a data.frame, +#' as specified in `columns`, have NA values. If so, it returns a string with +#' an error message, otherwise it returns TRUE. +#' @inherit document_check_functions params return #' #' @keywords internal check_no_NA_present <- function(data, columns) { @@ -280,9 +265,7 @@ diagnose <- function(data) { #' @description #' Runs [get_duplicate_forecasts()] and returns a message if an issue is encountered #' @inheritParams get_duplicate_forecasts -#' @return Returns an string with an error message if an issue is found, -#' otherwise returns TRUE -#' +#' @inherit document_check_functions return #' @keywords internal check_duplicates <- function(data, forecast_unit = NULL) { check_duplicates <- get_duplicate_forecasts(data, forecast_unit = forecast_unit) @@ -320,10 +303,11 @@ check_duplicates <- function(data, forecast_unit = NULL) { #' Check column names are present in a data.frame -#' @param data A data.frame or similar to be checked -#' @param columns names of columns to be checked -#' @return Returns string with a message with the first issue encountered if -#' any of the column names are not in data, otherwise returns TRUE +#' @description +#' The functions loops over the column names and checks whether they are +#' present. If an issue is encountered, the function immediately stops +#' and returns a message with the first issue encountered. +#' @inherit document_check_functions params return #' @importFrom checkmate assert_character #' @keywords check-inputs check_columns_present <- function(data, columns) { @@ -342,8 +326,10 @@ check_columns_present <- function(data, columns) { } #' Test whether all column names are present in a data.frame -#' @param data A data.frame or similar to be checked -#' @param columns names of columns to be checked +#' @description The function checks whether all column names are present. If +#' one or more columns are missing, the function returns FALSE. If all columns +#' are present, the function returns TRUE. +#' @inheritParams document_check_functions #' @return Returns TRUE if all columns are present and FALSE otherwise #' @keywords internal test_columns_present <- function(data, columns) { @@ -352,8 +338,10 @@ test_columns_present <- function(data, columns) { } #' Test whether column names are NOT present in a data.frame -#' @param data A data.frame or similar to be checked -#' @param columns names of columns to be checked +#' @description The function checks whether all column names are NOT present. +#' If none of the columns are present, the function returns TRUE. If one or +#' more columns are present, the function returns FALSE. +#' @inheritParams document_check_functions #' @return Returns TRUE if none of the columns are present and FALSE otherwise #' @keywords internal test_columns_not_present <- function(data, columns) { @@ -366,12 +354,10 @@ test_columns_not_present <- function(data, columns) { #' Check whether data is data.frame with correct columns #' @description Checks whether data is a data.frame, whether columns -#' "observed" and "predicted" are presents -#' and checks that only one of "quantile" and "sample_id" is present. -#' @param data A data.frame or similar to be checked +#' "observed" and "predicted" are present, and checks that only one of +#' "quantile" and "sample_id" is present. +#' @inherit document_check_functions params return #' @importFrom checkmate check_data_frame -#' @return Returns TRUE if basic requirements are satisfied and a string with -#' an error message otherwise #' @keywords check-inputs check_data_columns <- function(data) { is_data <- check_data_frame(data, min.rows = 1) @@ -396,8 +382,7 @@ check_data_columns <- function(data) { #' @description Checks whether an object has an attribute #' @param object An object to be checked #' @param attribute name of an attribute to be checked -#' @return Returns TRUE if attribute is there and an error message as -#' a string otherwise +#' @inherit document_check_functions return #' @keywords check-inputs check_has_attribute <- function(object, attribute) { if (is.null(attr(object, attribute))) { @@ -408,5 +393,3 @@ check_has_attribute <- function(object, attribute) { return(TRUE) } } - - diff --git a/R/check-inputs-scoring-functions.R b/R/check-inputs-scoring-functions.R index 780086818..350a3af4f 100644 --- a/R/check-inputs-scoring-functions.R +++ b/R/check-inputs-scoring-functions.R @@ -1,7 +1,6 @@ #' @title Assert that inputs are correct for sample-based forecast -#' -#' @description Helper function to assert whether the input is suitable for -#' scoring. +#' @description Function assesses whether the inputs correspond to the +#' requirements for scoring sample-based forecasts. #' @param observed Input to be checked. Should be a numeric vector with the #' observed values of size n #' @param predicted Input to be checked. Should be a numeric nxN matrix of @@ -10,8 +9,7 @@ #' If `observed` is just a single number, then predicted values can just be a #' vector of size N. #' @importFrom checkmate assert assert_numeric check_matrix -#' @return Returns NULL invisibly if the check was successful and throws an -#' error otherwise. +#' @inherit document_assert_functions return #' @keywords check-inputs assert_input_sample <- function(observed, predicted) { assert_numeric(observed, min.len = 1) @@ -30,12 +28,8 @@ assert_input_sample <- function(observed, predicted) { } #' @title Check that inputs are correct for sample-based forecast -#' -#' @description Helper function to check whether the input is suitable for -#' scoring. -#' @inheritParams assert_input_sample -#' @return Returns TRUE if the check was successful and a string with the -#' error message otherwise +#' @inherit assert_input_sample params description +#' @inherit document_check_functions return #' @keywords check-inputs check_input_sample <- function(observed, predicted) { result <- check_try(assert_input_sample(observed, predicted)) @@ -43,11 +37,10 @@ check_input_sample <- function(observed, predicted) { } -#' @title Assert that inputs are correct for sample-based forecast -#' -#' @description Helper function to assert whether the input is suitable for -#' scoring. -#' @param observed Input to be checked. Should be a vector with the +#' @title Assert that inputs are correct for quantile-based forecast +#' @description Function assesses whether the inputs correspond to the +#' requirements for scoring quantile-based forecasts. +#' @param observed Input to be checked. Should be a numeric vector with the #' observed values of size n #' @param predicted Input to be checked. Should be nxN matrix of predictive #' quantiles, n (number of rows) being the number of data points and N @@ -58,8 +51,7 @@ check_input_sample <- function(observed, predicted) { #' denotes the quantile levels corresponding to the columns of the prediction #' matrix. #' @importFrom checkmate assert assert_numeric check_matrix -#' @return Returns NULL invisibly if the check was successful and throws an -#' error otherwise. +#' @inherit document_assert_functions return #' @keywords internal assert_input_quantile <- function(observed, predicted, quantile) { assert_numeric(observed, min.len = 1) @@ -84,12 +76,8 @@ assert_input_quantile <- function(observed, predicted, quantile) { } #' @title Check that inputs are correct for quantile-based forecast -#' -#' @description Helper function to check whether the input is suitable for -#' scoring. -#' @inheritParams assert_input_quantile -#' @return Returns TRUE if the check was successful and a string with the -#' error message otherwise +#' @inherit assert_input_quantile params description +#' @inherit check_input_sample return description #' @keywords check-inputs check_input_quantile <- function(observed, predicted, quantile) { result <- check_try(assert_input_quantile(observed, predicted, quantile)) @@ -98,9 +86,8 @@ check_input_quantile <- function(observed, predicted, quantile) { #' @title Assert that inputs are correct for binary forecast -#' -#' @description Helper function to assert whether the input is suitable for -#' scoring. +#' @description Function assesses whether the inputs correspond to the +#' requirements for scoring binary forecasts. #' @param observed Input to be checked. Should be a factor of length n with #' exactly two levels, holding the observed values. #' The highest factor level is assumed to be the reference level. This means @@ -110,12 +97,10 @@ check_input_quantile <- function(observed, predicted, quantile) { #' length n, holding probabilities. Values represent the probability that #' the corresponding value in `observed` will be equal to the highest #' available factor level. -#' @param ... additional arguments passed to other functions #' @importFrom checkmate assert assert_factor -#' @return Returns NULL invisibly if the check was successful and throws an -#' error otherwise. +#' @inherit document_assert_functions return #' @keywords check-inputs -assert_input_binary <- function(observed, predicted, ...) { +assert_input_binary <- function(observed, predicted) { if (length(observed) != length(predicted)) { stop("`observed` and `predicted` need to be ", "of same length when scoring binary forecasts") @@ -129,29 +114,23 @@ assert_input_binary <- function(observed, predicted, ...) { } #' @title Check that inputs are correct for binary forecast -#' -#' @description Helper function to check whether the input is suitable for -#' scoring. -#' @inheritParams assert_input_binary -#' @return Returns TRUE if the check was successful and a string with the -#' error message otherwise +#' @inherit assert_input_binary params description +#' @inherit document_check_functions return #' @keywords check-inputs check_input_binary <- function(observed, predicted) { - result <- check_try(assert_input_binary(observed, predicted, call_levels_up = 8)) + result <- check_try(assert_input_binary(observed, predicted)) return(result) } #' @title Assert that inputs are correct for point forecast -#' -#' @description Helper function to assert whether the input is suitable for -#' scoring. +#' @description Function assesses whether the inputs correspond to the +#' requirements for scoring point forecasts. #' @param observed Input to be checked. Should be a numeric vector with the #' observed values of size n #' @param predicted Input to be checked. Should be a numeric vector with the #' predicted values of size n -#' @return Returns NULL invisibly if the check was successful and throws an -#' error otherwise. +#' @inherit document_assert_functions return #' @keywords check-inputs assert_input_point <- function(observed, predicted) { assert(check_numeric_vector(observed, min.len = 1)) @@ -164,12 +143,8 @@ assert_input_point <- function(observed, predicted) { } #' @title Check that inputs are correct for point forecast -#' -#' @description Helper function to check whether the input is suitable for -#' scoring. -#' @inheritParams assert_input_point -#' @return Returns TRUE if the check was successful and a string with the -#' error message otherwise +#' @inherit assert_input_point params description +#' @inherit document_check_functions return #' @keywords check-inputs check_input_point <- function(observed, predicted) { result <- check_try(assert_input_point(observed, predicted)) diff --git a/R/convenience-functions.R b/R/convenience-functions.R index 3b1bd719e..870e4ac3d 100644 --- a/R/convenience-functions.R +++ b/R/convenience-functions.R @@ -1,6 +1,7 @@ #' @title Transform forecasts and observed values #' -#' @description Function to transform forecasts and observed values before scoring. +#' @description Function to transform forecasts and observed values before +#' scoring. #' #' @details There are a few reasons, depending on the circumstances, for #' why this might be desirable (check out the linked reference for more info). @@ -114,7 +115,8 @@ transform_forecasts <- function(data, if (scale_col_present) { if (!("natural" %in% original_data$scale)) { stop( - "If a column 'scale' is present, entries with scale =='natural' are required for the transformation" + "If a column 'scale' is present, entries with scale =='natural' ", + "are required for the transformation" ) } if (append && (label %in% original_data$scale)) { diff --git a/R/correlations.R b/R/correlations.R index 2033a8a00..75eda7583 100644 --- a/R/correlations.R +++ b/R/correlations.R @@ -23,8 +23,7 @@ correlation <- function(scores, digits = NULL) { metrics <- check_metrics(metrics) - # check metrics are present - metrics <- names(scores)[names(scores) %in% metrics] + metrics <- get_metrics(scores) # if quantile column is present, throw a warning if ("quantile" %in% names(scores)) { diff --git a/R/documentation-templates.R b/R/documentation-templates.R new file mode 100644 index 000000000..087c77d4b --- /dev/null +++ b/R/documentation-templates.R @@ -0,0 +1,25 @@ +#' Documentation template for check functions +#' @param data A data.frame or similar to be checked +#' @param columns A character vector of column names to check +#' @return Returns TRUE if the check was successful and a string with an +#' error message otherwise +#' @name document_check_functions +NULL + +#' Documentation template for check functions +#' @returns Returns NULL invisibly if the assertion was successful and throws an +#' error otherwise. +#' @name document_assert_functions +NULL + +#' Documentation template for test functions +#' @returns Returns TRUE if the check was successful and FALSE otherwise +#' @name document_test_functions +NULL + +#' Documentation template for scoring input data +#' @param data A data frame (or similar) of forecasts following the +#' specifications detailed in [score()]. +#' @param scores A data.table of scores as produced by [score()]. +#' @name document_score_data +NULL diff --git a/R/get_-functions.R b/R/get_-functions.R index 095a77e48..f9b88a971 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -13,29 +13,31 @@ #' "sample" or "point". #' #' @keywords internal - get_forecast_type <- function(data) { if (test_forecast_type_is_binary(data)) { - return("binary") - } - if (test_forecast_type_is_quantile(data)) { - return("quantile") - } - if (test_forecast_type_is_sample(data)) { - return("sample") - } - if (test_forecast_type_is_point(data)) { - return("point") - } - stop("Checking `data`: input doesn't satisfy the criteria for any forecast type.", + forecast_type <- "binary" + } else if (test_forecast_type_is_quantile(data)) { + forecast_type <- "quantile" + } else if (test_forecast_type_is_sample(data)) { + forecast_type <- "sample" + } else if (test_forecast_type_is_point(data)) { + forecast_type <- "point" + } else { + stop("Checking `data`: input doesn't satisfy the criteria for any forecast type.", "Are you missing a column `quantile` or `sample_id`?", "Please check the vignette for additional info.") + } + conflict <- check_attribute_conflict(data, "forecast_type", forecast_type) + if (!is.logical(conflict)) { + warning(conflict) + } + return(forecast_type) } #' Test whether data could be a binary forecast. #' @description Checks type of the necessary columns. -#' @param data A data.frame or similar to be checked +#' @inheritParams document_check_functions #' @importFrom checkmate test_factor test_numeric #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise #' @keywords internal @@ -47,7 +49,7 @@ test_forecast_type_is_binary <- function(data) { #' Test whether data could be a sample-based forecast. #' @description Checks type of the necessary columns. -#' @param data A data.frame or similar to be checked +#' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise #' @keywords internal test_forecast_type_is_sample <- function(data) { @@ -59,7 +61,7 @@ test_forecast_type_is_sample <- function(data) { #' Test whether data could be a point forecast. #' @description Checks type of the necessary columns. -#' @param data A data.frame or similar to be checked +#' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise #' @keywords internal test_forecast_type_is_point <- function(data) { @@ -71,7 +73,7 @@ test_forecast_type_is_point <- function(data) { #' Test whether data could be a quantile forecast. #' @description Checks type of the necessary columns. -#' @param data A data.frame or similar to be checked +#' @inheritParams document_check_functions #' @return Returns TRUE if basic requirements are satisfied and FALSE otherwise #' @keywords internal test_forecast_type_is_quantile <- function(data) { @@ -82,43 +84,33 @@ test_forecast_type_is_quantile <- function(data) { } - - -# need to think about whether we want or keep this function -get_prediction_type <- function(data) { - if (is.data.frame(data)) { - data <- data$predicted - } - if ( - isTRUE(all.equal(as.vector(data), as.integer(data))) && - !all(is.na(as.integer(data))) - ) { - return("discrete") - } else if (suppressWarnings(!all(is.na(as.numeric(data))))) { - return("continuous") - } else { - stop("Input is not numeric and cannot be coerced to numeric") - } -} - -#' @title Get type of the target true values of a forecast +#' @title Get type of a vector or matrix of observed values or predictions #' -#' @description Internal helper function to get the type of the target -#' true values of a forecast. That is inferred based on the type and the -#' content of the `observed` column. +#' @description Internal helper function to get the type of a vector (usually +#' of observed or predicted values). The function checks whether the input is +#' a factor, or else whether it is integer (or can be coerced to integer) or +#' whether it's continuous. #' -#' @inheritParams validate +#' @param x Input used to get the type. #' -#' @return Character vector of length one with either "binary", "integer", or -#' "continuous" +#' @return Character vector of length one with either "classification", +#' "integer", or "continuous" #' #' @keywords internal - -get_target_type <- function(data) { - if (is.factor(data$observed)) { +get_type <- function(x) { + if (is.factor(x)) { return("classification") } - if (isTRUE(all.equal(data$observed, as.integer(data$observed)))) { + assert_numeric(as.vector(x)) + if (all(is.na(as.vector(x)))) { + stop("Can't get type: all values of are NA") + } + if (is.integer(x)) { + return("integer") + } + if ( + isTRUE(all.equal(as.vector(x), as.integer(x))) && !all(is.na(as.integer(x))) + ) { return("integer") } else { return("continuous") @@ -149,9 +141,6 @@ get_metrics <- function(scores) { } - - - #' @title Get unit of a single forecast #' #' @description Helper function to get the unit of a single forecast, i.e. @@ -162,17 +151,25 @@ get_metrics <- function(scores) { #' specified during scoring, if any. #' #' @inheritParams validate +#' @param check_conflict Whether or not to check whether there is a conflict +#' between a stored attribute and the inferred forecast unit. Defaults to FALSE. #' #' @return A character vector with the column names that define the unit of #' a single forecast #' #' @keywords internal - -get_forecast_unit <- function(data) { +get_forecast_unit <- function(data, check_conflict = FALSE) { + # check whether there is a conflict in the forecast_unit and if so warn protected_columns <- get_protected_columns(data) protected_columns <- c(protected_columns, attr(data, "metric_names")) forecast_unit <- setdiff(colnames(data), unique(protected_columns)) + + conflict <- check_attribute_conflict(data, "forecast_unit", forecast_unit) + if (check_conflict && !is.logical(conflict)) { + warning(conflict) + } + return(forecast_unit) } @@ -189,7 +186,6 @@ get_forecast_unit <- function(data) { #' protected in scoringutils. #' #' @keywords internal - get_protected_columns <- function(data = NULL) { protected_columns <- c( diff --git a/R/metrics-binary.R b/R/metrics-binary.R index 5c8182224..4a0abed49 100644 --- a/R/metrics-binary.R +++ b/R/metrics-binary.R @@ -1,24 +1,7 @@ -#' Brier Score -#' -#' @description -#' Computes the Brier Score for probabilistic forecasts of binary outcomes. +#' Metrics for Binary Outcomes #' #' @details -#' The Brier score is a proper score rule that assesses the accuracy of -#' probabilistic binary predictions. The outcomes can be either 0 or 1, -#' the predictions must be a probability that the observed outcome will be 1. -#' -#' The Brier Score is then computed as the mean squared error between the -#' probabilistic prediction and the observed outcome. -#' -#' \deqn{ -#' \textrm{Brier\_Score} = \frac{1}{N} \sum_{t = 1}^{n} (\textrm{prediction}_t - -#' \textrm{outcome}_t)^2 -#' }{ -#' Brier_Score = 1/N sum_{t = 1}^{n} (prediction_t - outcome_t)² -#' } -#' -#' The function requires users to provide observed values as a factor in order +#' The functions require users to provide observed values as a factor in order #' to distinguish its input from the input format required for scoring point #' forecasts. Internally, however, factors will be converted to numeric values. #' A factor `observed = factor(c(0, 1, 1, 0, 1)` with two levels (`0` and `1`) @@ -45,17 +28,35 @@ #' @param predicted A numeric vector of length n, holding probabilities. #' Values represent the probability that the corresponding outcome is equal to #' the highest level of the factor `observed`. -#' @return A numeric value with the Brier Score, i.e. the mean squared -#' error of the given probability forecasts -#' @export -#' #' @examples #' observed <- factor(sample(c(0, 1), size = 30, replace = TRUE)) #' predicted <- runif(n = 30, min = 0, max = 1) #' #' brier_score(observed, predicted) -#' @keywords metric +#' logs_binary(observed, predicted) +#' @name scoring-functions-binary +NULL + +#' @description +#' **Brier score** +#' +#' The Brier Score is the mean squared error between the probabilistic +#' prediction and the observed outcome. The Brier score is a proper scoring +#' rule. Small values are better (best is 0, the worst is 1). +#' +#' \deqn{ +#' \textrm{Brier\_Score} = \frac{1}{N} \sum_{t = 1}^{n} (\textrm{prediction}_t - +#' \textrm{outcome}_t)^2, +#' }{ +#' Brier_Score = 1/N sum_{t = 1}^{n} (prediction_t - outcome_t)², +#' } where \eqn{\textrm{outcome}_t \in \{0, 1\}}{outcome_t in {0, 1}}, and +#' \eqn{\textrm{prediction}_t \in [0, 1]}{prediction_t in [0, 1]} represents +#' the probability that the outcome is equal to 1. +#' @return A numeric vector of size n with the Brier scores +#' @keywords metric +#' @export +#' @rdname scoring-functions-binary brier_score <- function(observed, predicted) { assert_input_binary(observed, predicted) @@ -68,46 +69,17 @@ brier_score <- function(observed, predicted) { #' Log Score for Binary outcomes #' #' @description -#' Computes the Log Score for probabilistic forecasts of binary outcomes. -#' -#' @details -#' The Log Score is a proper score rule suited to assessing the accuracy of -#' probabilistic binary predictions. The outcomes can be either 0 or 1, -#' the predictions must be a probability that the true outcome will be 1. +#' **Log score for binary outcomes** #' -#' The Log Score is then computed as the negative logarithm of the probability -#' assigned to the true outcome. Reporting the negative logarithm means that -#' smaller values are better. +#' The Log Score is the negative logarithm of the probability +#' assigned to the observed value. It is a proper scoring rule. Small values +#' are better (best is zero, worst is infinity). #' -#' The function requires users to provide observed values as a factor in order -#' to distinguish its input from the input format required for scoring point -#' forecasts. Internally, however, factors will be converted to numeric values. -#' A factor `observed = factor(c(0, 1, 1, 0, 1)` with two levels (`0` and `1`) -#' would internally be coerced to a numeric vector (in this case this would -#' result in the numeric vector c(1, 2, 2, 1, 1)). After subtracting 1, the -#' resulting vector (`c(0, 1, 1, 0)` in this case) is used for internal -#' calculations. All predictions are assumed represent the probability that the -#' outcome is equal of the highest factor level (in this case that the -#' outcome is equal to 1). -#' You could alternatively also provide a vector like -#' `observed = factor(c("a", "b", "b", "a"))` (with two levels, `a` and `b`), -#' which would result in exactly the same internal representation. Probabilities -#' then represent the probability that the outcome is equal to "b". -#' If you want your predictions to be probabilities that the outcome is "a", -#' then you could of course make `observed` a factor with levels swapped, i.e. -#' `observed = factor(c("a", "b", "b", "a"), levels = c("b", "a"))` -#' -#' @inheritParams brier_score -#' @return A numeric vector with log scores +#' @return A numeric vector of size n with log scores #' @importFrom methods hasArg #' @export #' @keywords metric -#' -#' @examples -#' observed <- factor(sample(c(0, 1), size = 30, replace = TRUE)) -#' predicted <- runif(n = 30, min = 0, max = 1) - -#' logs_binary(observed, predicted) +#' @rdname scoring-functions-binary logs_binary <- function(observed, predicted) { assert_input_binary(observed, predicted) observed <- as.numeric(observed) - 1 diff --git a/R/metrics-range.R b/R/metrics-range.R index 95e4a9103..fe8f54cab 100644 --- a/R/metrics-range.R +++ b/R/metrics-range.R @@ -107,11 +107,11 @@ interval_score <- function(observed, "need all arguments 'observed', 'lower', 'upper' and 'interval_range' in function 'interval_score()'" # nolint ) } - check_not_null( + assert_not_null( observed = observed, lower = lower, upper = upper, interval_range = interval_range ) - check_equal_length(observed, lower, interval_range, upper) + assert_equal_length(observed, lower, interval_range, upper) if (any(interval_range < 0, na.rm = TRUE)) { stop("interval ranges must be positive") @@ -272,5 +272,3 @@ bias_range <- function(lower, upper, range, observed) { return(bias) } - - diff --git a/R/metrics-sample.R b/R/metrics-sample.R index 47ba90313..67291bffa 100644 --- a/R/metrics-sample.R +++ b/R/metrics-sample.R @@ -57,7 +57,7 @@ bias_sample <- function(observed, predicted) { assert_input_sample(observed, predicted) - prediction_type <- get_prediction_type(predicted) + prediction_type <- get_type(predicted) # empirical cdf n_pred <- ncol(predicted) @@ -283,4 +283,3 @@ mad_sample <- function(observed = NULL, predicted, ...) { sharpness <- apply(predicted, MARGIN = 1, mad, ...) return(sharpness) } - diff --git a/R/pit.R b/R/pit.R index 9366c9eb6..2e00e4b90 100644 --- a/R/pit.R +++ b/R/pit.R @@ -89,11 +89,11 @@ pit_sample <- function(observed, # error handling-------------------------------------------------------------- # check al arguments are provided - # this could be integrated into check_not_null + # this could be integrated into assert_not_null if (missing("observed") || missing("predicted")) { stop("`observed` or `predicted` missing in function 'pit_sample()'") } - check_not_null(observed = observed, predicted = predicted) + assert_not_null(observed = observed, predicted = predicted) # check if there is more than one observation n <- length(observed) @@ -198,6 +198,8 @@ pit <- function(data, coverage <- summarise_scores(coverage, by = unique(c(by, "quantile")) ) + # remove all existing attributes and class + coverage <- remove_scoringutils_class(coverage) coverage <- coverage[order(quantile), .( diff --git a/R/plot.R b/R/plot.R index 65c730b41..972bf0c49 100644 --- a/R/plot.R +++ b/R/plot.R @@ -411,14 +411,16 @@ plot_predictions <- function(data, # find out what type of predictions we have. convert sample based to # range data - prediction_type <- get_prediction_type(data) - if (prediction_type %in% c("integer", "continuous")) { - forecasts <- sample_to_range_long(forecasts, - range = range, + + if (test_forecast_type_is_quantile(data)) { + forecasts <- quantile_to_range_long( + forecasts, keep_quantile_col = FALSE ) - } else if (prediction_type == "quantile") { - forecasts <- quantile_to_range_long(forecasts, + } else if (test_forecast_type_is_sample(data)) { + forecasts <- sample_to_range_long( + forecasts, + range = range, keep_quantile_col = FALSE ) } @@ -538,7 +540,7 @@ make_NA <- function(data = NULL, what = c("truth", "forecast", "both"), ...) { - check_not_null(data = data) + assert_not_null(data = data) data <- data.table::copy(data) what <- match.arg(what) diff --git a/R/score.R b/R/score.R index 3e631de1c..4e235ade3 100644 --- a/R/score.R +++ b/R/score.R @@ -1,60 +1,102 @@ -#' @title Evaluate forecasts -#' -#' @description This function allows automatic scoring of forecasts using a -#' range of metrics. For most users it will be the workhorse for -#' scoring forecasts as it wraps the lower level functions package functions. -#' However, these functions are also available if you wish to make use of them -#' independently. -#' -#' A range of forecasts formats are supported, including quantile-based, -#' sample-based, binary forecasts. Prior to scoring, users may wish to make use -#' of [validate()] to ensure that the input data is in a supported -#' format though this will also be run internally by [score()]. Examples for -#' each format are also provided (see the documentation for `data` below or in -#' [validate()]). -#' -#' Each format has a set of required columns (see below). Additional columns may -#' be present to indicate a grouping of forecasts. For example, we could have -#' forecasts made by different models in various locations at different time -#' points, each for several weeks into the future. It is important, that there -#' are only columns present which are relevant in order to group forecasts. -#' A combination of different columns should uniquely define the -#' *unit of a single forecast*, meaning that a single forecast is defined by the -#' values in the other columns. Adding additional unrelated columns may alter -#' results. -#' -#' To obtain a quick overview of the currently supported evaluation metrics, -#' have a look at the [metrics] data included in the package. The column -#' `metrics$Name` gives an overview of all available metric names that can be -#' computed. If interested in an unsupported metric please open a [feature -#' request](https://github.com/epiforecasts/scoringutils/issues) or consider -#' contributing a pull request. +#' @title Evaluate forecasts in a data.frame format #' -#' For additional help and examples, check out the [Getting Started -#' Vignette](https://epiforecasts.io/scoringutils/articles/scoringutils.html) -#' as well as the paper [Evaluating Forecasts with scoringutils in -#' R](https://arxiv.org/abs/2205.07090). +#' @description `score()` applies a selection of scoring metrics to a data.frame +#' of forecasts. It is the workhorse of the `scoringutils` package. +#' `score()` is a generic that dispatches to different methods depending on the +#' class of the input data. The default method is `score.default()`, which +#' validates the input, assigns as class based on the forecast type, and then +#' calls `score()` again to dispatch to the appropriate method. See below for +#' more information on how forecast types are determined. +#' +#' @details +#' **Forecast types and input format** +#' +#' Various different forecast types / forecast formats are supported. At the +#' moment, those are +#' - point forecasts +#' - binary forecasts ("soft binary classification") +#' - Probabilistic forecasts in a quantile-based format (a forecast is +#' represented as a set of predictive quantiles) +#' - Probabilistic forecasts in a sample-based format (a forecast is represented +#' as a set of predictive samples) +#' +#' Forecast types are determined based on the columns present in the input data. +#' +#' *Point forecasts* require a column `observed` of type numeric and a column +#' `predicted` of type numeric. #' -#' @param data A data.frame or data.table with the following columns: -#' - `observed` - observed values -#' - `predicted` - predictions, predictive samples or predictive quantiles -#' - `model` - name of the model or forecaster who made a prediction +#' *Binary forecasts* require a column `observed` of type factor with exactly +#' two levels and a column `predicted` of type numeric with probabilities, +#' corresponding to the probability that `observed` is equal to the second +#' factor level. See details [here][brier_score()] for more information. #' -#' Depending on the forecast type, one of the following columns may be required: -#' - `sample_id` - index for the predictive samples in the 'predicted' column -#' - `quantile`: quantile-level of the corresponding value in `predicted` +#' *Quantile-based forecasts* require a column `observed` of type numeric, +#' a column `predicted` of type numeric, and a column `quantile` of type numeric +#' with quantile-levels (between 0 and 1). +#' +#' *Sample-based forecasts* require a column `observed` of type numeric, +#' a column `predicted` of type numeric, and a column `sample_id` of type +#' numeric with sample indices. #' #' For more information see the vignettes and the example data -#' ([example_quantile], [example_continuous], -#' [example_integer], [example_point()], and [example_binary]). +#' ([example_quantile], [example_continuous], [example_integer], +#' [example_point()], and [example_binary]). +#' +#' **Forecast unit** +#' +#' In order to score forecasts, `scoringutils` needs to know which of the rows +#' of the data belong together and jointly form a single forecasts. This is +#' easy e.g. for point forecast, where there is one row per forecast. For +#' quantile or sample-based forecasts, however, there are multiple rows that +#' belong to single forecast. +#' +#' The *forecast unit* or *unit of a single forecast* is then described by the +#' combination of columns that uniquely identify a single forecast. +#' For example, we could have forecasts made by different models in various +#' locations at different time points, each for several weeks into the future. +#' The forecast unit could then be described as +#' `forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")`. +#' `scoringutils` automatically tries to determine the unit of a single +#' forecast. It uses all existing columns for this, which means that no columns +#' must be present that are unrelated to the forecast unit. As a very simplistic +#' example, if you had an additional row, "even", that is one if the row number +#' is even and zero otherwise, then this would mess up scoring as `scoringutils` +#' then thinks that this column was relevant in defining the forecast unit. +#' +#' In order to avoid issues, we recommend using the function +#' [set_forecast_unit()] to determine the forecast unit manually. +#' The function simply drops unneeded columns, while making sure that all +#' necessary, 'protected columns' like "predicted" or "observed" are retained. +#' +#' **Validating inputs** #' -#' @param metrics the metrics you want to have in the output. If `NULL` (the -#' default), all available metrics will be computed. -#' @param ... additional parameters passed down to other functions. +#' We recommend that users validate their input prior to scoring using the +#' function [validate()] (though this will also be run internally by [score()]). +#' The function checks the input data and provides helpful information. #' -#' @return A data.table with unsummarised scores. There will be one score per -#' quantile or sample_id, which is usually not desired, so you should almost -#' always run [summarise_scores()] on the unsummarised scores. +#' +#' **Further help** +#' +#' For additional help and examples, check out the [Getting Started +#' Vignette](https://epiforecasts.io/scoringutils/articles/scoringutils.html) as +#' well as the paper [Evaluating Forecasts with scoringutils in +#' R](https://arxiv.org/abs/2205.07090). +#' +#' @param data A data.frame or data.table with predicted and observed values. +#' @param metrics A named list of scoring functions. Names will be used as +#' column names in the output. See [metrics_point()], [metrics_binary()], +#' `metrics_quantile()`, and [metrics_sample()] for more information on the +#' default metrics used. +#' @param ... additional arguments +#' +#' @return A data.table with unsummarised scores. This will generally be +#' one score per forecast (as defined by the unit of a single forecast). +#' +#' For quantile-based forecasts, one score per quantile will be returned +#' instead. This is done as scores can be computed and may be of interest +#' for individual quantiles. You can call [summarise_scores()]) on the +#' unsummarised scores to obtain one score per forecast unit for quantile-based +#' forecasts. #' #' @importFrom data.table ':=' as.data.table #' @@ -62,9 +104,8 @@ #' library(magrittr) # pipe operator #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN #' -#' validate(example_quantile) -#' score(example_quantile) %>% -#' add_coverage(by = c("model", "target_type")) %>% +#' validated <- validate(example_quantile) +#' score(validated) %>% #' summarise_scores(by = c("model", "target_type")) #' #' # set forecast unit manually (to avoid issues with scoringutils trying to @@ -80,19 +121,17 @@ #' \dontrun{ #' score(example_binary) #' score(example_quantile) +#' score(example_point) #' score(example_integer) #' score(example_continuous) #' } #' -#' # score point forecasts (marked by 'NA' in the quantile column) -#' score(example_point) %>% -#' summarise_scores(by = "model", na.rm = TRUE) -#' #' @author Nikos Bosse \email{nikosbosse@@gmail.com} -#' @references Funk S, Camacho A, Kucharski AJ, Lowe R, Eggo RM, Edmunds WJ -#' (2019) Assessing the performance of real-time epidemic forecasts: A -#' case study of Ebola in the Western Area region of Sierra Leone, 2014-15. -#' PLoS Comput Biol 15(2): e1006785. \doi{10.1371/journal.pcbi.1006785} +#' @references +#' Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S +#' (2022) Evaluating Forecasts with scoringutils in R. +#' \doi{10.48550/arXiv.2205.07090} +#' #' @export score <- function(data, ...) { @@ -111,7 +150,6 @@ score.default <- function(data, ...) { score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { data <- validate(data) data <- remove_na_observed_predicted(data) - forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) # Extract the arguments passed in ... @@ -122,8 +160,8 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { matching_args <- filter_function_args(fun, args) data[, (metric_name) := do.call( - fun, c(list(observed, predicted), matching_args)) - ] + fun, c(list(observed, predicted), matching_args) + )] return() }, ...) @@ -140,7 +178,6 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { score.scoringutils_point <- function(data, metrics = metrics_point, ...) { data <- validate(data) data <- remove_na_observed_predicted(data) - forecast_unit <- attr(data, "forecast_unit") metrics <- validate_metrics(metrics) # Extract the arguments passed in ... @@ -151,8 +188,8 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) { matching_args <- filter_function_args(fun, args) data[, (metric_name) := do.call( - fun, c(list(observed, predicted), matching_args)) - ] + fun, c(list(observed, predicted), matching_args) + )] return() }, ...) @@ -177,11 +214,11 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { matching_args <- filter_function_args(fun, args) data[, (metric_name) := do.call( - fun, c(list(unique(observed), t(predicted)), matching_args)), - by = forecast_unit - ] + fun, c(list(unique(observed), t(predicted)), matching_args) + ), by = forecast_unit] return() - }, ...) + }, + ...) data <- data[ , lapply(.SD, unique), @@ -201,7 +238,10 @@ score.scoringutils_quantile <- function(data, metrics = NULL, ...) { data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") - metrics <- check_metrics(metrics) + if (is.null(metrics)) { + metrics <- available_metrics() + } + metrics <- metrics[metrics %in% available_metrics()] scores <- score_quantile( data = data, forecast_unit = forecast_unit, diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 44a0b0c35..03a8883fb 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -30,9 +30,10 @@ #' @examples #' data.table::setDTthreads(1) # only needed to avoid issues on CRAN #' library(magrittr) # pipe operator -#' +#' \dontrun{ #' scores <- score(example_continuous) #' summarise_scores(scores) +#' } #' #' #' # summarise over samples or quantiles to get one score per forecast @@ -87,7 +88,6 @@ summarise_scores <- function(scores, # preparations --------------------------------------------------------------- # get unit of a single forecast forecast_unit <- get_forecast_unit(scores) - check_attribute_conflict(scores, "forecast_unit", forecast_unit) # if by is not provided, set to the unit of a single forecast if (is.null(by)) { @@ -384,4 +384,3 @@ add_coverage <- function(scores, return(scores_with_coverage[]) } - diff --git a/R/utils.R b/R/utils.R index 1301aa8fe..cdb882093 100644 --- a/R/utils.R +++ b/R/utils.R @@ -133,7 +133,6 @@ filter_function_args <- function(fun, args) { #' @title Assign attributes to an object from a named list -#' #' @description #' Every list item will be made an attribute of the object. #' @param object An object to assign attributes to @@ -152,3 +151,49 @@ assign_attributes <- function(object, attribute_list) { return(object) } +#' Strip attributes from an object +#' @description This function removes all attributes from an object that are +#' specified in the `attributes` argument. +#' @param object An object to remove attributes from +#' @param attributes A character vector of attribute names to remove from the +#' object +#' @return The object with attributes removed +#' @keywords internal +strip_attributes <- function(object, attributes) { + if (is.null(object)) { + return(NULL) + } + for (i in seq_along(attributes)) { + setattr(object, attributes[i], NULL) + } + return(object) +} + +#' Remove scoringutils_ Class and Attributes +#' @description This function removes all classes that start with +#' "scoringutils_" and all attributes associated with scoringutils. +#' +#' @param object An object to remove scoringutils classes and attributes from +#' @return The object with scoringutils classes and attributes removed +#' @keywords internal +remove_scoringutils_class <- function(object) { + if (is.null(object)) { + return(NULL) + } + if (is.null(class(object))) { + return(object) + } + # check if "scoringutils_" is in name of any class + if (any(grepl("scoringutils_", class(object)))) { + stored_attributes <- get_scoringutils_attributes(object) + + # remove all classes that contain "scoringutils_" + class(object) <- class(object)[!grepl("scoringutils_", class(object))] + + # remove all scoringutils attributes + object <- strip_attributes(object, names(stored_attributes)) + + return(object) + } + return(object) +} diff --git a/R/validate.R b/R/validate.R index 08fcfd4a4..d283f7aa8 100644 --- a/R/validate.R +++ b/R/validate.R @@ -122,10 +122,9 @@ validate_general <- function(data) { # assign forecast type and unit as an attribute and make sure there is no clash forecast_type <- get_forecast_type(data) - assert(check_attribute_conflict(data, "forecast_type", forecast_type)) setattr(data, "forecast_type", forecast_type) - forecast_unit <- get_forecast_unit(data) - assert(check_attribute_conflict(data, "forecast_unit", forecast_unit)) + + forecast_unit <- get_forecast_unit(data, check_conflict = TRUE) setattr(data, "forecast_unit", forecast_unit) # check that there aren't any duplicated forecasts diff --git a/data/metrics_binary.rda b/data/metrics_binary.rda index abb9bd8fb..9a37a5dda 100644 Binary files a/data/metrics_binary.rda and b/data/metrics_binary.rda differ diff --git a/data/metrics_sample.rda b/data/metrics_sample.rda index 0040c4485..f14e6f7c1 100644 Binary files a/data/metrics_sample.rda and b/data/metrics_sample.rda differ diff --git a/man/check_equal_length.Rd b/man/assert_equal_length.Rd similarity index 57% rename from man/check_equal_length.Rd rename to man/assert_equal_length.Rd index aebeacabd..8b83b4c85 100644 --- a/man/check_equal_length.Rd +++ b/man/assert_equal_length.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/check-input-helpers.R -\name{check_equal_length} -\alias{check_equal_length} -\title{Check Length} +\name{assert_equal_length} +\alias{assert_equal_length} +\title{Check Length of Two Vectors is Equal} \usage{ -check_equal_length(..., one_allowed = TRUE, call_levels_up = 2) +assert_equal_length(..., one_allowed = TRUE, call_levels_up = 2) } \arguments{ \item{...}{The variables to check} @@ -13,12 +13,12 @@ check_equal_length(..., one_allowed = TRUE, call_levels_up = 2) recycled} \item{call_levels_up}{How many levels to go up when including the function -call in the error message. This is useful when calling \code{check_equal_length()} +call in the error message. This is useful when calling \code{assert_equal_length()} within another checking function.} } \value{ -The function returns \code{NULL}, but throws an error if variable lengths -differ +Returns NULL invisibly if the assertion was successful and throws an +error otherwise. } \description{ Check whether variables all have the same length diff --git a/man/assert_input_binary.Rd b/man/assert_input_binary.Rd index 951a223f6..4ca8f7883 100644 --- a/man/assert_input_binary.Rd +++ b/man/assert_input_binary.Rd @@ -4,7 +4,7 @@ \alias{assert_input_binary} \title{Assert that inputs are correct for binary forecast} \usage{ -assert_input_binary(observed, predicted, ...) +assert_input_binary(observed, predicted) } \arguments{ \item{observed}{Input to be checked. Should be a factor of length n with @@ -17,15 +17,13 @@ to the highest factor level.} length n, holding probabilities. Values represent the probability that the corresponding value in \code{observed} will be equal to the highest available factor level.} - -\item{...}{additional arguments passed to other functions} } \value{ -Returns NULL invisibly if the check was successful and throws an +Returns NULL invisibly if the assertion was successful and throws an error otherwise. } \description{ -Helper function to assert whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring binary forecasts. } \keyword{check-inputs} diff --git a/man/assert_input_point.Rd b/man/assert_input_point.Rd index 397a6ae0f..f2f9434a9 100644 --- a/man/assert_input_point.Rd +++ b/man/assert_input_point.Rd @@ -14,11 +14,11 @@ observed values of size n} predicted values of size n} } \value{ -Returns NULL invisibly if the check was successful and throws an +Returns NULL invisibly if the assertion was successful and throws an error otherwise. } \description{ -Helper function to assert whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring point forecasts. } \keyword{check-inputs} diff --git a/man/assert_input_quantile.Rd b/man/assert_input_quantile.Rd index c2a5e6aa5..87247c0ff 100644 --- a/man/assert_input_quantile.Rd +++ b/man/assert_input_quantile.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/check-inputs-scoring-functions.R \name{assert_input_quantile} \alias{assert_input_quantile} -\title{Assert that inputs are correct for sample-based forecast} +\title{Assert that inputs are correct for quantile-based forecast} \usage{ assert_input_quantile(observed, predicted, quantile) } \arguments{ -\item{observed}{Input to be checked. Should be a vector with the +\item{observed}{Input to be checked. Should be a numeric vector with the observed values of size n} \item{predicted}{Input to be checked. Should be nxN matrix of predictive @@ -21,11 +21,11 @@ denotes the quantile levels corresponding to the columns of the prediction matrix.} } \value{ -Returns NULL invisibly if the check was successful and throws an +Returns NULL invisibly if the assertion was successful and throws an error otherwise. } \description{ -Helper function to assert whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring quantile-based forecasts. } \keyword{internal} diff --git a/man/assert_input_sample.Rd b/man/assert_input_sample.Rd index 556ebaea1..027899adf 100644 --- a/man/assert_input_sample.Rd +++ b/man/assert_input_sample.Rd @@ -17,11 +17,11 @@ If \code{observed} is just a single number, then predicted values can just be a vector of size N.} } \value{ -Returns NULL invisibly if the check was successful and throws an +Returns NULL invisibly if the assertion was successful and throws an error otherwise. } \description{ -Helper function to assert whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring sample-based forecasts. } \keyword{check-inputs} diff --git a/man/check_not_null.Rd b/man/assert_not_null.Rd similarity index 88% rename from man/check_not_null.Rd rename to man/assert_not_null.Rd index 149e90b8a..276615941 100644 --- a/man/check_not_null.Rd +++ b/man/assert_not_null.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/check-input-helpers.R -\name{check_not_null} -\alias{check_not_null} +\name{assert_not_null} +\alias{assert_not_null} \title{Check Variable is not NULL} \usage{ -check_not_null(...) +assert_not_null(...) } \arguments{ \item{...}{The variables to check} diff --git a/man/assure_model_column.Rd b/man/assure_model_column.Rd new file mode 100644 index 000000000..456652df2 --- /dev/null +++ b/man/assure_model_column.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-input-helpers.R +\name{assure_model_column} +\alias{assure_model_column} +\title{Assure that Data Has a \code{model} Column} +\usage{ +assure_model_column(data) +} +\value{ +The data.table with a column called \code{model} +} +\description{ +Check whether the data.table has a column called \code{model}. +If not, a column called \code{model} is added with the value \verb{Unspecified model}. +} +\keyword{internal} diff --git a/man/avail_forecasts.Rd b/man/avail_forecasts.Rd index a95fb8c57..57d47c5cf 100644 --- a/man/avail_forecasts.Rd +++ b/man/avail_forecasts.Rd @@ -7,22 +7,7 @@ avail_forecasts(data, by = NULL, collapse = c("quantile", "sample")) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{by}{character vector or \code{NULL} (the default) that denotes the categories over which the number of forecasts should be counted. @@ -30,11 +15,13 @@ By default (\code{by = NULL}) this will be the unit of a single forecast (i.e. all available columns (apart from a few "protected" columns such as 'predicted' and 'observed') plus "quantile" or "sample_id" where present).} -\item{collapse}{character vector (default is \verb{c("quantile", "sample"}) with -names of categories for which the number of rows should be collapsed to one -when counting. For example, a single forecast is usually represented by a +\item{collapse}{character vector (default is \verb{c("quantile", "sample_id"}) +with names of categories for which the number of rows should be collapsed to +one when counting. For example, a single forecast is usually represented by a set of several quantiles or samples and collapsing these to one makes sure -that a single forecast only gets counted once.} +that a single forecast only gets counted once. Setting \code{collapse = c()} +would mean that all quantiles / samples would be counted as individual +forecasts.} } \value{ A data.table with columns as specified in \code{by} and an additional @@ -54,7 +41,6 @@ This is useful to determine whether there are any missing forecasts. data.table::setDTthreads(1) # only needed to avoid issues on CRAN available_forecasts(example_quantile, - collapse = c("quantile"), by = c("model", "target_type") ) } diff --git a/man/available_forecasts.Rd b/man/available_forecasts.Rd index 89278f879..3c214a1e3 100644 --- a/man/available_forecasts.Rd +++ b/man/available_forecasts.Rd @@ -4,25 +4,10 @@ \alias{available_forecasts} \title{Count Number of Available Forecasts} \usage{ -available_forecasts(data, by = NULL, collapse = c("quantile", "sample")) +available_forecasts(data, by = NULL, collapse = c("quantile", "sample_id")) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{by}{character vector or \code{NULL} (the default) that denotes the categories over which the number of forecasts should be counted. @@ -30,11 +15,13 @@ By default (\code{by = NULL}) this will be the unit of a single forecast (i.e. all available columns (apart from a few "protected" columns such as 'predicted' and 'observed') plus "quantile" or "sample_id" where present).} -\item{collapse}{character vector (default is \verb{c("quantile", "sample"}) with -names of categories for which the number of rows should be collapsed to one -when counting. For example, a single forecast is usually represented by a +\item{collapse}{character vector (default is \verb{c("quantile", "sample_id"}) +with names of categories for which the number of rows should be collapsed to +one when counting. For example, a single forecast is usually represented by a set of several quantiles or samples and collapsing these to one makes sure -that a single forecast only gets counted once.} +that a single forecast only gets counted once. Setting \code{collapse = c()} +would mean that all quantiles / samples would be counted as individual +forecasts.} } \value{ A data.table with columns as specified in \code{by} and an additional @@ -50,7 +37,6 @@ This is useful to determine whether there are any missing forecasts. data.table::setDTthreads(1) # only needed to avoid issues on CRAN available_forecasts(example_quantile, - collapse = c("quantile"), by = c("model", "target_type") ) } diff --git a/man/check_attribute_conflict.Rd b/man/check_attribute_conflict.Rd new file mode 100644 index 000000000..c01ac264e --- /dev/null +++ b/man/check_attribute_conflict.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-input-helpers.R +\name{check_attribute_conflict} +\alias{check_attribute_conflict} +\title{Check Whether There Is a Conflict Between Data and Attributes} +\usage{ +check_attribute_conflict(object, attribute, expected) +} +\arguments{ +\item{object}{The object to check} + +\item{attribute}{The name of the attribute to check} + +\item{expected}{The expected value of the attribute} +} +\value{ +Returns TRUE if the check was successful and a string with an +error message otherwise +} +\description{ +Check whether there is a conflict between a stored attribute and the +same value as inferred from the data. For example, this could be if +an attribute \code{forecast_unit} is stored, but is different from the +\code{forecast_unit} inferred from the data. The check is successful if +the stored and the inferred value are the same. +} +\keyword{internal} diff --git a/man/check_columns_present.Rd b/man/check_columns_present.Rd index 16ee334e2..cfe76f064 100644 --- a/man/check_columns_present.Rd +++ b/man/check_columns_present.Rd @@ -9,13 +9,15 @@ check_columns_present(data, columns) \arguments{ \item{data}{A data.frame or similar to be checked} -\item{columns}{names of columns to be checked} +\item{columns}{A character vector of column names to check} } \value{ -Returns string with a message with the first issue encountered if -any of the column names are not in data, otherwise returns TRUE +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ -Check column names are present in a data.frame +The functions loops over the column names and checks whether they are +present. If an issue is encountered, the function immediately stops +and returns a message with the first issue encountered. } \keyword{check-inputs} diff --git a/man/check_data_columns.Rd b/man/check_data_columns.Rd index 041a51915..04fba0892 100644 --- a/man/check_data_columns.Rd +++ b/man/check_data_columns.Rd @@ -10,12 +10,12 @@ check_data_columns(data) \item{data}{A data.frame or similar to be checked} } \value{ -Returns TRUE if basic requirements are satisfied and a string with -an error message otherwise +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ Checks whether data is a data.frame, whether columns -"observed" and "predicted" are presents -and checks that only one of "quantile" and "sample_id" is present. +"observed" and "predicted" are present, and checks that only one of +"quantile" and "sample_id" is present. } \keyword{check-inputs} diff --git a/man/check_duplicates.Rd b/man/check_duplicates.Rd index 88bec59b3..e4b0918ee 100644 --- a/man/check_duplicates.Rd +++ b/man/check_duplicates.Rd @@ -14,8 +14,8 @@ the unit of a single forecast. If \code{NULL} (the default) the function tries to infer the unit of a single forecast.} } \value{ -Returns an string with an error message if an issue is found, -otherwise returns TRUE +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ Runs \code{\link[=get_duplicate_forecasts]{get_duplicate_forecasts()}} and returns a message if an issue is encountered diff --git a/man/check_has_attribute.Rd b/man/check_has_attribute.Rd index 48b49c208..339e0d6d0 100644 --- a/man/check_has_attribute.Rd +++ b/man/check_has_attribute.Rd @@ -12,8 +12,8 @@ check_has_attribute(object, attribute) \item{attribute}{name of an attribute to be checked} } \value{ -Returns TRUE if attribute is there and an error message as -a string otherwise +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ Checks whether an object has an attribute diff --git a/man/check_input_binary.Rd b/man/check_input_binary.Rd index 2120af24d..5b206f35b 100644 --- a/man/check_input_binary.Rd +++ b/man/check_input_binary.Rd @@ -19,11 +19,11 @@ the corresponding value in \code{observed} will be equal to the highest available factor level.} } \value{ -Returns TRUE if the check was successful and a string with the +Returns TRUE if the check was successful and a string with an error message otherwise } \description{ -Helper function to check whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring binary forecasts. } \keyword{check-inputs} diff --git a/man/check_input_point.Rd b/man/check_input_point.Rd index 5dc0835e0..060b785b6 100644 --- a/man/check_input_point.Rd +++ b/man/check_input_point.Rd @@ -14,11 +14,11 @@ observed values of size n} predicted values of size n} } \value{ -Returns TRUE if the check was successful and a string with the +Returns TRUE if the check was successful and a string with an error message otherwise } \description{ -Helper function to check whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring point forecasts. } \keyword{check-inputs} diff --git a/man/check_input_quantile.Rd b/man/check_input_quantile.Rd index 6a7a575c8..a2315aa2e 100644 --- a/man/check_input_quantile.Rd +++ b/man/check_input_quantile.Rd @@ -7,7 +7,7 @@ check_input_quantile(observed, predicted, quantile) } \arguments{ -\item{observed}{Input to be checked. Should be a vector with the +\item{observed}{Input to be checked. Should be a numeric vector with the observed values of size n} \item{predicted}{Input to be checked. Should be nxN matrix of predictive @@ -21,11 +21,11 @@ denotes the quantile levels corresponding to the columns of the prediction matrix.} } \value{ -Returns TRUE if the check was successful and a string with the +Returns TRUE if the check was successful and a string with an error message otherwise } \description{ -Helper function to check whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring quantile-based forecasts. } \keyword{check-inputs} diff --git a/man/check_input_sample.Rd b/man/check_input_sample.Rd index 92260edc2..607bebb5f 100644 --- a/man/check_input_sample.Rd +++ b/man/check_input_sample.Rd @@ -17,11 +17,11 @@ If \code{observed} is just a single number, then predicted values can just be a vector of size N.} } \value{ -Returns TRUE if the check was successful and a string with the +Returns TRUE if the check was successful and a string with an error message otherwise } \description{ -Helper function to check whether the input is suitable for -scoring. +Function assesses whether the inputs correspond to the +requirements for scoring sample-based forecasts. } \keyword{check-inputs} diff --git a/man/check_metrics.Rd b/man/check_metrics.Rd deleted file mode 100644 index dbfe3d58d..000000000 --- a/man/check_metrics.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check-input-helpers.R -\name{check_metrics} -\alias{check_metrics} -\title{Check whether the desired metrics are available in scoringutils} -\usage{ -check_metrics(metrics) -} -\arguments{ -\item{metrics}{character vector with desired metrics} -} -\value{ -A character vector with metrics that can be used for downstream -computation -} -\description{ -Helper function to check whether desired metrics are -available. If the input is \code{NULL}, all metrics will be returned. -} -\keyword{internal} diff --git a/man/check_no_NA_present.Rd b/man/check_no_NA_present.Rd index 0d5e0afa1..cf89ce468 100644 --- a/man/check_no_NA_present.Rd +++ b/man/check_no_NA_present.Rd @@ -9,13 +9,15 @@ check_no_NA_present(data, columns) \arguments{ \item{data}{A data.frame or similar to be checked} -\item{columns}{names of columns to be checked} +\item{columns}{A character vector of column names to check} } \value{ -Returns an string with a message if any of the column names -have NA values, otherwise returns TRUE +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ -Check columns in data.frame don't have NA values +Function checks whether any of the columns in a data.frame, +as specified in \code{columns}, have NA values. If so, it returns a string with +an error message, otherwise it returns TRUE. } \keyword{internal} diff --git a/man/check_number_per_forecast.Rd b/man/check_number_per_forecast.Rd index b17878d3a..4d0a18432 100644 --- a/man/check_number_per_forecast.Rd +++ b/man/check_number_per_forecast.Rd @@ -7,15 +7,17 @@ check_number_per_forecast(data, forecast_unit) } \arguments{ -\item{data}{data.frame to check} +\item{data}{A data.frame or similar to be checked} \item{forecast_unit}{Character vector denoting the unit of a single forecast.} } \value{ -Returns an string with a message if any forecasts have differing -numbers of samples or quantiles, otherwise returns TRUE +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ -Check that all forecasts have the same number of quantiles or samples +Function checks the number of quantiles or samples per forecast. +If the number of quantiles or samples is the same for all forecasts, it +returns TRUE and a string with an error message otherwise. } \keyword{internal} diff --git a/man/check_numeric_vector.Rd b/man/check_numeric_vector.Rd index 70eafa2c1..847bef940 100644 --- a/man/check_numeric_vector.Rd +++ b/man/check_numeric_vector.Rd @@ -7,11 +7,49 @@ check_numeric_vector(x, ...) } \arguments{ -\item{x}{additional arguments to pass to \code{check_numeric()}} +\item{x}{input to check} + +\item{...}{ + Arguments passed on to \code{\link[checkmate:checkNumeric]{checkmate::check_numeric}} + \describe{ + \item{\code{lower}}{[\code{numeric(1)}]\cr +Lower value all elements of \code{x} must be greater than or equal to.} + \item{\code{upper}}{[\code{numeric(1)}]\cr +Upper value all elements of \code{x} must be lower than or equal to.} + \item{\code{finite}}{[\code{logical(1)}]\cr +Check for only finite values? Default is \code{FALSE}.} + \item{\code{any.missing}}{[\code{logical(1)}]\cr +Are vectors with missing values allowed? Default is \code{TRUE}.} + \item{\code{all.missing}}{[\code{logical(1)}]\cr +Are vectors with no non-missing values allowed? Default is \code{TRUE}. +Note that empty vectors do not have non-missing values.} + \item{\code{len}}{[\code{integer(1)}]\cr +Exact expected length of \code{x}.} + \item{\code{min.len}}{[\code{integer(1)}]\cr +Minimal length of \code{x}.} + \item{\code{max.len}}{[\code{integer(1)}]\cr +Maximal length of \code{x}.} + \item{\code{unique}}{[\code{logical(1)}]\cr +Must all values be unique? Default is \code{FALSE}.} + \item{\code{sorted}}{[\code{logical(1)}]\cr +Elements must be sorted in ascending order. Missing values are ignored.} + \item{\code{names}}{[\code{character(1)}]\cr +Check for names. See \code{\link[checkmate]{checkNamed}} for possible values. +Default is \dQuote{any} which performs no check at all. +Note that you can use \code{\link[checkmate]{checkSubset}} to check for a specific set of names.} + \item{\code{typed.missing}}{[\code{logical(1)}]\cr +If set to \code{FALSE} (default), all types of missing values (\code{NA}, \code{NA_integer_}, +\code{NA_real_}, \code{NA_character_} or \code{NA_character_}) as well as empty vectors are allowed +while type-checking atomic input. +Set to \code{TRUE} to enable strict type checking.} + \item{\code{null.ok}}{[\code{logical(1)}]\cr +If set to \code{TRUE}, \code{x} may also be \code{NULL}. +In this case only a type check of \code{x} is performed, all additional checks are disabled.} + }} } \value{ -Either TRUE if the test is successful or a string with an error -message +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ Helper function diff --git a/man/check_try.Rd b/man/check_try.Rd index 931c1d4c5..87f479f66 100644 --- a/man/check_try.Rd +++ b/man/check_try.Rd @@ -10,11 +10,13 @@ check_try(expr) \item{expr}{an expression to be evaluated} } \value{ -Returns TRUE if expression was executed successfully, otherwise -returns a string with the resulting error message +Returns TRUE if the check was successful and a string with an +error message otherwise } \description{ Tries to execute an expression. Internally, this is used to -see whether assertions fail when checking inputs +see whether assertions fail when checking inputs (i.e. to convert an +\verb{assert_*()} statement into a check). If the expression fails, the error +message is returned. If the expression succeeds, \code{TRUE} is returned. } \keyword{internal} diff --git a/man/document_assert_functions.Rd b/man/document_assert_functions.Rd new file mode 100644 index 000000000..ee0dbc967 --- /dev/null +++ b/man/document_assert_functions.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{document_assert_functions} +\alias{document_assert_functions} +\title{Documentation template for check functions} +\value{ +Returns NULL invisibly if the assertion was successful and throws an +error otherwise. +} +\description{ +Documentation template for check functions +} diff --git a/man/document_check_functions.Rd b/man/document_check_functions.Rd new file mode 100644 index 000000000..6f7f7f677 --- /dev/null +++ b/man/document_check_functions.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{document_check_functions} +\alias{document_check_functions} +\title{Documentation template for check functions} +\arguments{ +\item{data}{A data.frame or similar to be checked} + +\item{columns}{A character vector of column names to check} +} +\value{ +Returns TRUE if the check was successful and a string with an +error message otherwise +} +\description{ +Documentation template for check functions +} diff --git a/man/document_score_data.Rd b/man/document_score_data.Rd new file mode 100644 index 000000000..9e30190d3 --- /dev/null +++ b/man/document_score_data.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{document_score_data} +\alias{document_score_data} +\title{Documentation template for scoring input data} +\arguments{ +\item{data}{A data frame (or similar) of forecasts following the +specifications detailed in \code{\link[=score]{score()}}.} + +\item{scores}{A data.table of scores as produced by \code{\link[=score]{score()}}.} +} +\description{ +Documentation template for scoring input data +} diff --git a/man/document_test_functions.Rd b/man/document_test_functions.Rd new file mode 100644 index 000000000..620cb1989 --- /dev/null +++ b/man/document_test_functions.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/documentation-templates.R +\name{document_test_functions} +\alias{document_test_functions} +\title{Documentation template for test functions} +\value{ +Returns TRUE if the check was successful and FALSE otherwise +} +\description{ +Documentation template for test functions +} diff --git a/man/get_forecast_type.Rd b/man/get_forecast_type.Rd index 560a24589..a923baa80 100644 --- a/man/get_forecast_type.Rd +++ b/man/get_forecast_type.Rd @@ -7,22 +7,7 @@ get_forecast_type(data) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} } \value{ Character vector of length one with either "binary", "quantile", diff --git a/man/get_forecast_unit.Rd b/man/get_forecast_unit.Rd index f17349853..065fe94d4 100644 --- a/man/get_forecast_unit.Rd +++ b/man/get_forecast_unit.Rd @@ -4,25 +4,13 @@ \alias{get_forecast_unit} \title{Get unit of a single forecast} \usage{ -get_forecast_unit(data) +get_forecast_unit(data, check_conflict = FALSE) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} +\item{data}{A data.frame or data.table with predicted and observed values.} -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{check_conflict}{Whether or not to check whether there is a conflict +between a stored attribute and the inferred forecast unit. Defaults to FALSE.} } \value{ A character vector with the column names that define the unit of diff --git a/man/get_protected_columns.Rd b/man/get_protected_columns.Rd index f171b4c22..4bbf8f6da 100644 --- a/man/get_protected_columns.Rd +++ b/man/get_protected_columns.Rd @@ -7,22 +7,7 @@ get_protected_columns(data = NULL) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} } \value{ A character vector with the names of protected columns in the data. diff --git a/man/get_target_type.Rd b/man/get_target_type.Rd deleted file mode 100644 index 78bf86455..000000000 --- a/man/get_target_type.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_-functions.R -\name{get_target_type} -\alias{get_target_type} -\title{Get type of the target true values of a forecast} -\usage{ -get_target_type(data) -} -\arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} -} -\value{ -Character vector of length one with either "binary", "integer", or -"continuous" -} -\description{ -Internal helper function to get the type of the target -true values of a forecast. That is inferred based on the type and the -content of the \code{observed} column. -} -\keyword{internal} diff --git a/man/get_type.Rd b/man/get_type.Rd new file mode 100644 index 000000000..bcce4b70a --- /dev/null +++ b/man/get_type.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_-functions.R +\name{get_type} +\alias{get_type} +\title{Get type of a vector or matrix of observed values or predictions} +\usage{ +get_type(x) +} +\arguments{ +\item{x}{Input used to get the type.} +} +\value{ +Character vector of length one with either "classification", +"integer", or "continuous" +} +\description{ +Internal helper function to get the type of a vector (usually +of observed or predicted values). The function checks whether the input is +a factor, or else whether it is integer (or can be coerced to integer) or +whether it's continuous. +} +\keyword{internal} diff --git a/man/logs_binary.Rd b/man/logs_binary.Rd deleted file mode 100644 index 5a7d4c144..000000000 --- a/man/logs_binary.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metrics-binary.R -\name{logs_binary} -\alias{logs_binary} -\title{Log Score for Binary outcomes} -\usage{ -logs_binary(observed, predicted) -} -\arguments{ -\item{observed}{A factor of length n with exactly two levels, holding -the observed values. -The highest factor level is assumed to be the reference level. This means -that \code{predicted} represents the probability that the observed value is -equal to the highest factor level.} - -\item{predicted}{A numeric vector of length n, holding probabilities. -Values represent the probability that the corresponding outcome is equal to -the highest level of the factor \code{observed}.} -} -\value{ -A numeric vector with log scores -} -\description{ -Computes the Log Score for probabilistic forecasts of binary outcomes. -} -\details{ -The Log Score is a proper score rule suited to assessing the accuracy of -probabilistic binary predictions. The outcomes can be either 0 or 1, -the predictions must be a probability that the true outcome will be 1. - -The Log Score is then computed as the negative logarithm of the probability -assigned to the true outcome. Reporting the negative logarithm means that -smaller values are better. - -The function requires users to provide observed values as a factor in order -to distinguish its input from the input format required for scoring point -forecasts. Internally, however, factors will be converted to numeric values. -A factor \verb{observed = factor(c(0, 1, 1, 0, 1)} with two levels (\code{0} and \code{1}) -would internally be coerced to a numeric vector (in this case this would -result in the numeric vector c(1, 2, 2, 1, 1)). After subtracting 1, the -resulting vector (\code{c(0, 1, 1, 0)} in this case) is used for internal -calculations. All predictions are assumed represent the probability that the -outcome is equal of the highest factor level (in this case that the -outcome is equal to 1). -You could alternatively also provide a vector like -\code{observed = factor(c("a", "b", "b", "a"))} (with two levels, \code{a} and \code{b}), -which would result in exactly the same internal representation. Probabilities -then represent the probability that the outcome is equal to "b". -If you want your predictions to be probabilities that the outcome is "a", -then you could of course make \code{observed} a factor with levels swapped, i.e. -\code{observed = factor(c("a", "b", "b", "a"), levels = c("b", "a"))} -} -\examples{ -observed <- factor(sample(c(0, 1), size = 30, replace = TRUE)) -predicted <- runif(n = 30, min = 0, max = 1) -logs_binary(observed, predicted) -} -\keyword{metric} diff --git a/man/make_NA.Rd b/man/make_NA.Rd index 66be33ce1..1b1814c0e 100644 --- a/man/make_NA.Rd +++ b/man/make_NA.Rd @@ -10,22 +10,7 @@ make_NA(data = NULL, what = c("truth", "forecast", "both"), ...) make_na(data = NULL, what = c("truth", "forecast", "both"), ...) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{what}{character vector that determines which values should be turned into \code{NA}. If \code{what = "truth"}, values in the column 'observed' will be diff --git a/man/new_scoringutils.Rd b/man/new_scoringutils.Rd index 4792675df..b2f83ff6a 100644 --- a/man/new_scoringutils.Rd +++ b/man/new_scoringutils.Rd @@ -7,22 +7,7 @@ new_scoringutils(data, classname) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{classname}{name of the class to be created} } diff --git a/man/remove_scoringutils_class.Rd b/man/remove_scoringutils_class.Rd new file mode 100644 index 000000000..9257d84dd --- /dev/null +++ b/man/remove_scoringutils_class.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{remove_scoringutils_class} +\alias{remove_scoringutils_class} +\title{Remove scoringutils_ Class and Attributes} +\usage{ +remove_scoringutils_class(object) +} +\arguments{ +\item{object}{An object to remove scoringutils classes and attributes from} +} +\value{ +The object with scoringutils classes and attributes removed +} +\description{ +This function removes all classes that start with +"scoringutils_" and all attributes associated with scoringutils. +} +\keyword{internal} diff --git a/man/score.Rd b/man/score.Rd index 52a8c32a9..bf239ba7e 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -7,7 +7,7 @@ \alias{score.scoringutils_point} \alias{score.scoringutils_sample} \alias{score.scoringutils_quantile} -\title{Evaluate forecasts} +\title{Evaluate forecasts in a data.frame format} \usage{ score(data, ...) @@ -22,73 +22,113 @@ score(data, ...) \method{score}{scoringutils_quantile}(data, metrics = NULL, ...) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction +\item{data}{A data.frame or data.table with predicted and observed values.} + +\item{...}{additional arguments} + +\item{metrics}{A named list of scoring functions. Names will be used as +column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}}, +\code{metrics_quantile()}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the +default metrics used.} +} +\value{ +A data.table with unsummarised scores. This will generally be +one score per forecast (as defined by the unit of a single forecast). + +For quantile-based forecasts, one score per quantile will be returned +instead. This is done as scores can be computed and may be of interest +for individual quantiles. You can call \code{\link[=summarise_scores]{summarise_scores()}}) on the +unsummarised scores to obtain one score per forecast unit for quantile-based +forecasts. } +\description{ +\code{score()} applies a selection of scoring metrics to a data.frame +of forecasts. It is the workhorse of the \code{scoringutils} package. +\code{score()} is a generic that dispatches to different methods depending on the +class of the input data. The default method is \code{score.default()}, which +validates the input, assigns as class based on the forecast type, and then +calls \code{score()} again to dispatch to the appropriate method. See below for +more information on how forecast types are determined. +} +\details{ +\strong{Forecast types and input format} -Depending on the forecast type, one of the following columns may be required: +Various different forecast types / forecast formats are supported. At the +moment, those are \itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} +\item point forecasts +\item binary forecasts ("soft binary classification") +\item Probabilistic forecasts in a quantile-based format (a forecast is +represented as a set of predictive quantiles) +\item Probabilistic forecasts in a sample-based format (a forecast is represented +as a set of predictive samples) } -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +Forecast types are determined based on the columns present in the input data. -\item{...}{additional parameters passed down to other functions.} +\emph{Point forecasts} require a column \code{observed} of type numeric and a column +\code{predicted} of type numeric. -\item{metrics}{the metrics you want to have in the output. If \code{NULL} (the -default), all available metrics will be computed.} -} -\value{ -A data.table with unsummarised scores. There will be one score per -quantile or sample_id, which is usually not desired, so you should almost -always run \code{\link[=summarise_scores]{summarise_scores()}} on the unsummarised scores. -} -\description{ -This function allows automatic scoring of forecasts using a -range of metrics. For most users it will be the workhorse for -scoring forecasts as it wraps the lower level functions package functions. -However, these functions are also available if you wish to make use of them -independently. - -A range of forecasts formats are supported, including quantile-based, -sample-based, binary forecasts. Prior to scoring, users may wish to make use -of \code{\link[=validate]{validate()}} to ensure that the input data is in a supported -format though this will also be run internally by \code{\link[=score]{score()}}. Examples for -each format are also provided (see the documentation for \code{data} below or in -\code{\link[=validate]{validate()}}). - -Each format has a set of required columns (see below). Additional columns may -be present to indicate a grouping of forecasts. For example, we could have -forecasts made by different models in various locations at different time -points, each for several weeks into the future. It is important, that there -are only columns present which are relevant in order to group forecasts. -A combination of different columns should uniquely define the -\emph{unit of a single forecast}, meaning that a single forecast is defined by the -values in the other columns. Adding additional unrelated columns may alter -results. - -To obtain a quick overview of the currently supported evaluation metrics, -have a look at the \link{metrics} data included in the package. The column -\code{metrics$Name} gives an overview of all available metric names that can be -computed. If interested in an unsupported metric please open a \href{https://github.com/epiforecasts/scoringutils/issues}{feature request} or consider -contributing a pull request. - -For additional help and examples, check out the \href{https://epiforecasts.io/scoringutils/articles/scoringutils.html}{Getting Started Vignette} -as well as the paper \href{https://arxiv.org/abs/2205.07090}{Evaluating Forecasts with scoringutils in R}. +\emph{Binary forecasts} require a column \code{observed} of type factor with exactly +two levels and a column \code{predicted} of type numeric with probabilities, +corresponding to the probability that \code{observed} is equal to the second +factor level. See details \link[=brier_score]{here} for more information. + +\emph{Quantile-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{quantile} of type numeric +with quantile-levels (between 0 and 1). + +\emph{Sample-based forecasts} require a column \code{observed} of type numeric, +a column \code{predicted} of type numeric, and a column \code{sample_id} of type +numeric with sample indices. + +For more information see the vignettes and the example data +(\link{example_quantile}, \link{example_continuous}, \link{example_integer}, +\code{\link[=example_point]{example_point()}}, and \link{example_binary}). + +\strong{Forecast unit} + +In order to score forecasts, \code{scoringutils} needs to know which of the rows +of the data belong together and jointly form a single forecasts. This is +easy e.g. for point forecast, where there is one row per forecast. For +quantile or sample-based forecasts, however, there are multiple rows that +belong to single forecast. + +The \emph{forecast unit} or \emph{unit of a single forecast} is then described by the +combination of columns that uniquely identify a single forecast. +For example, we could have forecasts made by different models in various +locations at different time points, each for several weeks into the future. +The forecast unit could then be described as +\code{forecast_unit = c("model", "location", "forecast_date", "forecast_horizon")}. +\code{scoringutils} automatically tries to determine the unit of a single +forecast. It uses all existing columns for this, which means that no columns +must be present that are unrelated to the forecast unit. As a very simplistic +example, if you had an additional row, "even", that is one if the row number +is even and zero otherwise, then this would mess up scoring as \code{scoringutils} +then thinks that this column was relevant in defining the forecast unit. + +In order to avoid issues, we recommend using the function +\code{\link[=set_forecast_unit]{set_forecast_unit()}} to determine the forecast unit manually. +The function simply drops unneeded columns, while making sure that all +necessary, 'protected columns' like "predicted" or "observed" are retained. + +\strong{Validating inputs} + +We recommend that users validate their input prior to scoring using the +function \code{\link[=validate]{validate()}} (though this will also be run internally by \code{\link[=score]{score()}}). +The function checks the input data and provides helpful information. + +\strong{Further help} + +For additional help and examples, check out the \href{https://epiforecasts.io/scoringutils/articles/scoringutils.html}{Getting Started Vignette} as +well as the paper \href{https://arxiv.org/abs/2205.07090}{Evaluating Forecasts with scoringutils in R}. } \examples{ library(magrittr) # pipe operator data.table::setDTthreads(1) # only needed to avoid issues on CRAN -validate(example_quantile) -score(example_quantile) \%>\% - add_coverage(by = c("model", "target_type")) \%>\% +validated <- validate(example_quantile) +score(validated) \%>\% summarise_scores(by = c("model", "target_type")) # set forecast unit manually (to avoid issues with scoringutils trying to @@ -104,20 +144,16 @@ example_quantile \%>\% \dontrun{ score(example_binary) score(example_quantile) +score(example_point) score(example_integer) score(example_continuous) } -# score point forecasts (marked by 'NA' in the quantile column) -score(example_point) \%>\% - summarise_scores(by = "model", na.rm = TRUE) - } \references{ -Funk S, Camacho A, Kucharski AJ, Lowe R, Eggo RM, Edmunds WJ -(2019) Assessing the performance of real-time epidemic forecasts: A -case study of Ebola in the Western Area region of Sierra Leone, 2014-15. -PLoS Comput Biol 15(2): e1006785. \doi{10.1371/journal.pcbi.1006785} +Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S +(2022) Evaluating Forecasts with scoringutils in R. +\doi{10.48550/arXiv.2205.07090} } \author{ Nikos Bosse \email{nikosbosse@gmail.com} diff --git a/man/score_quantile.Rd b/man/score_quantile.Rd index 002eaa147..5f51f94ec 100644 --- a/man/score_quantile.Rd +++ b/man/score_quantile.Rd @@ -14,29 +14,16 @@ score_quantile( ) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{forecast_unit}{A character vector with the column names that define the unit of a single forecast, i.e. a forecast was made for a combination of the values in \code{forecast_unit}} -\item{metrics}{the metrics you want to have in the output. If \code{NULL} (the -default), all available metrics will be computed.} +\item{metrics}{A named list of scoring functions. Names will be used as +column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}}, +\code{metrics_quantile()}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the +default metrics used.} \item{weigh}{if TRUE, weigh the score by alpha / 2, so it can be averaged into an interval score that, in the limit, corresponds to CRPS. Alpha is the @@ -65,10 +52,9 @@ A data.table with appropriate scores. For more information see Evaluate forecasts in a Quantile-Based Format } \references{ -Funk S, Camacho A, Kucharski AJ, Lowe R, Eggo RM, Edmunds WJ -(2019) Assessing the performance of real-time epidemic forecasts: A -case study of Ebola in the Western Area region of Sierra Leone, 2014-15. -PLoS Comput Biol 15(2): e1006785. \doi{10.1371/journal.pcbi.1006785} +Bosse NI, Gruson H, Cori A, van Leeuwen E, Funk S, Abbott S +(2022) Evaluating Forecasts with scoringutils in R. +\doi{10.48550/arXiv.2205.07090} } \author{ Nikos Bosse \email{nikosbosse@gmail.com} diff --git a/man/brier_score.Rd b/man/scoring-functions-binary.Rd similarity index 68% rename from man/brier_score.Rd rename to man/scoring-functions-binary.Rd index ab0c4574b..9a06018a1 100644 --- a/man/brier_score.Rd +++ b/man/scoring-functions-binary.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/metrics-binary.R -\name{brier_score} +\name{scoring-functions-binary} +\alias{scoring-functions-binary} \alias{brier_score} -\title{Brier Score} +\alias{logs_binary} +\title{Metrics for Binary Outcomes} \usage{ brier_score(observed, predicted) + +logs_binary(observed, predicted) } \arguments{ \item{observed}{A factor of length n with exactly two levels, holding @@ -18,28 +22,34 @@ Values represent the probability that the corresponding outcome is equal to the highest level of the factor \code{observed}.} } \value{ -A numeric value with the Brier Score, i.e. the mean squared -error of the given probability forecasts +A numeric vector of size n with the Brier scores + +A numeric vector of size n with log scores } \description{ -Computes the Brier Score for probabilistic forecasts of binary outcomes. -} -\details{ -The Brier score is a proper score rule that assesses the accuracy of -probabilistic binary predictions. The outcomes can be either 0 or 1, -the predictions must be a probability that the observed outcome will be 1. +\strong{Brier score} -The Brier Score is then computed as the mean squared error between the -probabilistic prediction and the observed outcome. +The Brier Score is the mean squared error between the probabilistic +prediction and the observed outcome. The Brier score is a proper scoring +rule. Small values are better (best is 0, the worst is 1). \deqn{ \textrm{Brier\_Score} = \frac{1}{N} \sum_{t = 1}^{n} (\textrm{prediction}_t - - \textrm{outcome}_t)^2 + \textrm{outcome}_t)^2, }{ - Brier_Score = 1/N sum_{t = 1}^{n} (prediction_t - outcome_t)² -} + Brier_Score = 1/N sum_{t = 1}^{n} (prediction_t - outcome_t)², +} where \eqn{\textrm{outcome}_t \in \{0, 1\}}{outcome_t in {0, 1}}, and +\eqn{\textrm{prediction}_t \in [0, 1]}{prediction_t in [0, 1]} represents +the probability that the outcome is equal to 1. + +\strong{Log score for binary outcomes} -The function requires users to provide observed values as a factor in order +The Log Score is the negative logarithm of the probability +assigned to the observed value. It is a proper scoring rule. Small values +are better (best is zero, worst is infinity). +} +\details{ +The functions require users to provide observed values as a factor in order to distinguish its input from the input format required for scoring point forecasts. Internally, however, factors will be converted to numeric values. A factor \verb{observed = factor(c(0, 1, 1, 0, 1)} with two levels (\code{0} and \code{1}) @@ -63,5 +73,6 @@ observed <- factor(sample(c(0, 1), size = 30, replace = TRUE)) predicted <- runif(n = 30, min = 0, max = 1) brier_score(observed, predicted) +logs_binary(observed, predicted) } \keyword{metric} diff --git a/man/set_forecast_unit.Rd b/man/set_forecast_unit.Rd index b0ffd8603..0f1dcc7d3 100644 --- a/man/set_forecast_unit.Rd +++ b/man/set_forecast_unit.Rd @@ -7,22 +7,7 @@ set_forecast_unit(data, forecast_unit) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{forecast_unit}{Character vector with the names of the columns that uniquely identify a single forecast.} diff --git a/man/strip_attributes.Rd b/man/strip_attributes.Rd new file mode 100644 index 000000000..5f727f539 --- /dev/null +++ b/man/strip_attributes.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{strip_attributes} +\alias{strip_attributes} +\title{Strip attributes from an object} +\usage{ +strip_attributes(object, attributes) +} +\arguments{ +\item{object}{An object to remove attributes from} + +\item{attributes}{A character vector of attribute names to remove from the +object} +} +\value{ +The object with attributes removed +} +\description{ +This function removes all attributes from an object that are +specified in the \code{attributes} argument. +} +\keyword{internal} diff --git a/man/summarise_scores.Rd b/man/summarise_scores.Rd index b875d3a40..3c461bfd0 100644 --- a/man/summarise_scores.Rd +++ b/man/summarise_scores.Rd @@ -47,9 +47,10 @@ Summarise scores as produced by \code{\link[=score]{score()}} \examples{ data.table::setDTthreads(1) # only needed to avoid issues on CRAN library(magrittr) # pipe operator - +\dontrun{ scores <- score(example_continuous) summarise_scores(scores) +} # summarise over samples or quantiles to get one score per forecast diff --git a/man/test_columns_not_present.Rd b/man/test_columns_not_present.Rd index 3b025f994..f55fe25b1 100644 --- a/man/test_columns_not_present.Rd +++ b/man/test_columns_not_present.Rd @@ -9,12 +9,14 @@ test_columns_not_present(data, columns) \arguments{ \item{data}{A data.frame or similar to be checked} -\item{columns}{names of columns to be checked} +\item{columns}{A character vector of column names to check} } \value{ Returns TRUE if none of the columns are present and FALSE otherwise } \description{ -Test whether column names are NOT present in a data.frame +The function checks whether all column names are NOT present. +If none of the columns are present, the function returns TRUE. If one or +more columns are present, the function returns FALSE. } \keyword{internal} diff --git a/man/test_columns_present.Rd b/man/test_columns_present.Rd index 1e463316f..ed5076417 100644 --- a/man/test_columns_present.Rd +++ b/man/test_columns_present.Rd @@ -9,12 +9,14 @@ test_columns_present(data, columns) \arguments{ \item{data}{A data.frame or similar to be checked} -\item{columns}{names of columns to be checked} +\item{columns}{A character vector of column names to check} } \value{ Returns TRUE if all columns are present and FALSE otherwise } \description{ -Test whether all column names are present in a data.frame +The function checks whether all column names are present. If +one or more columns are missing, the function returns FALSE. If all columns +are present, the function returns TRUE. } \keyword{internal} diff --git a/man/transform_forecasts.Rd b/man/transform_forecasts.Rd index 406791bad..a77043a95 100644 --- a/man/transform_forecasts.Rd +++ b/man/transform_forecasts.Rd @@ -7,22 +7,7 @@ transform_forecasts(data, fun = log_shift, append = TRUE, label = "log", ...) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} \item{fun}{A function used to transform both observed values and predictions. The default function is \code{\link[=log_shift]{log_shift()}}, a custom function that is essentially @@ -55,7 +40,8 @@ additional column, `scale', present which will be set to "natural" for the untransformed forecasts. } \description{ -Function to transform forecasts and observed values before scoring. +Function to transform forecasts and observed values before +scoring. } \details{ There are a few reasons, depending on the circumstances, for diff --git a/man/validate.Rd b/man/validate.Rd index e9d190f58..88badf49a 100644 --- a/man/validate.Rd +++ b/man/validate.Rd @@ -22,24 +22,9 @@ validate(data, ...) \method{validate}{scoringutils_sample}(data, ...) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} -\item{...}{additional parameters passed down to other functions.} +\item{...}{additional arguments} } \value{ Depending on the forecast type, an object of class diff --git a/man/validate_general.Rd b/man/validate_general.Rd index b12aced3d..548fa0322 100644 --- a/man/validate_general.Rd +++ b/man/validate_general.Rd @@ -7,22 +7,7 @@ validate_general(data) } \arguments{ -\item{data}{A data.frame or data.table with the following columns: -\itemize{ -\item \code{observed} - observed values -\item \code{predicted} - predictions, predictive samples or predictive quantiles -\item \code{model} - name of the model or forecaster who made a prediction -} - -Depending on the forecast type, one of the following columns may be required: -\itemize{ -\item \code{sample_id} - index for the predictive samples in the 'predicted' column -\item \code{quantile}: quantile-level of the corresponding value in \code{predicted} -} - -For more information see the vignettes and the example data -(\link{example_quantile}, \link{example_continuous}, -\link{example_integer}, \code{\link[=example_point]{example_point()}}, and \link{example_binary}).} +\item{data}{A data.frame or data.table with predicted and observed values.} } \value{ returns the input, with a few new attributes that hold additional diff --git a/tests/testthat/_snaps/plot_predictions/many-quantiles.svg b/tests/testthat/_snaps/plot_predictions/many-quantiles.svg index c89aa7a4a..a0b0e7f22 100644 --- a/tests/testthat/_snaps/plot_predictions/many-quantiles.svg +++ b/tests/testthat/_snaps/plot_predictions/many-quantiles.svg @@ -20,243 +20,299 @@ - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + - + - - + + - -IT -Cases + +IT +Cases - - + + - -IT -Deaths + +IT +Deaths - - + + - + - - + + - -FR -Deaths + +FR +Deaths - - + + - -GB -Cases + +GB +Cases - - + + - -GB -Deaths + +GB +Deaths @@ -292,103 +348,122 @@ Cases - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - - - - - - -May 15 -Jun 01 -Jun 15 -Jul 01 -Jul 15 - --2e+05 --1e+05 -0e+00 -1e+05 - - - - - -100 -200 - - - -400 -800 -1200 -1600 - - - - - -0e+00 -1e+05 -2e+05 -3e+05 - - - - - -500 -1000 -1500 - - - - -0 -30000 -60000 -90000 - - - - - -400 -800 -1200 - - - - -20000 -40000 -60000 - - - -target_end_date -True and predicted values + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + + + + + + +May 15 +Jun 01 +Jun 15 +Jul 01 +Jul 15 + +-2e+05 +-1e+05 +0e+00 +1e+05 + + + + + +100 +200 + + + +400 +800 +1200 +1600 + + + + + +0e+00 +1e+05 +2e+05 +3e+05 + + + + + +500 +1000 +1500 + + + + +0 +30000 +60000 +90000 + + + + + +400 +800 +1200 + + + + +20000 +40000 +60000 + + + +target_end_date +True and predicted values +range + + + + + + + + + + + + +60 +50 +40 +30 +20 +10 many_quantiles diff --git a/tests/testthat/_snaps/plot_predictions/no-median.svg b/tests/testthat/_snaps/plot_predictions/no-median.svg index 2d5cb577f..aa25ab251 100644 --- a/tests/testthat/_snaps/plot_predictions/no-median.svg +++ b/tests/testthat/_snaps/plot_predictions/no-median.svg @@ -20,305 +20,328 @@ - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - - - - - + + + + + + + - - + + - + - - + + - -United Kingdom -Cases + +United Kingdom +Cases - - + + - -United Kingdom -Deaths + +United Kingdom +Deaths - - + + - + - - + + - -Germany -Deaths + +Germany +Deaths - - + + - -Italy -Cases + +Italy +Cases - - + + - -Italy -Deaths + +Italy +Deaths - - + + - -France -Cases + +France +Cases - - + + - -France -Deaths + +France +Deaths - - + + - -Germany -Cases + +Germany +Cases - - - -Jun 28 -Jul 05 - - - -Jun 28 -Jul 05 - - - -Jun 28 -Jul 05 - -3900 -4200 -4500 -4800 -5100 - - - - - - -130 -150 -170 -190 - - - - - -160 -180 -200 - - - - -5500 -6000 -6500 -7000 - - - - - -120 -140 -160 -180 - - - - - -12500 -15000 -17500 -20000 -22500 - - - - - - -200 -250 -300 -350 - - - - - -120000 -150000 -180000 -210000 - - - - -target_end_date -True and predicted values -no_median + + + + + +Jun 28 +Jul 05 +Jul 12 +Jul 19 + + + + + +Jun 28 +Jul 05 +Jul 12 +Jul 19 + + + + + +Jun 28 +Jul 05 +Jul 12 +Jul 19 + +5000 +10000 + + + +50 +100 +150 +200 +250 + + + + + + +100 +200 +300 +400 +500 + + + + + + +10000 +20000 +30000 + + + + +200 +400 +600 + + + + +5e+04 +1e+05 + + + +100 +200 +300 + + + + +1e+05 +2e+05 +3e+05 +4e+05 + + + + +target_end_date +True and predicted values +range + + + + +90 +50 +no_median diff --git a/tests/testthat/_snaps/plot_predictions/point-forecasts.svg b/tests/testthat/_snaps/plot_predictions/point-forecasts.svg index ed6b8744e..1d7048d2f 100644 --- a/tests/testthat/_snaps/plot_predictions/point-forecasts.svg +++ b/tests/testthat/_snaps/plot_predictions/point-forecasts.svg @@ -25,6 +25,7 @@ + @@ -46,6 +47,7 @@ + @@ -67,6 +69,7 @@ + @@ -88,6 +91,7 @@ + @@ -109,6 +113,7 @@ + @@ -130,6 +135,7 @@ + @@ -151,6 +157,7 @@ + @@ -172,6 +179,7 @@ + diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dc3d6b941..c157fb958 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -3,4 +3,7 @@ library(ggplot2, quietly = TRUE) suppressMessages(library(magrittr)) # compute quantile scores -scores <- suppressMessages(score(example_quantile)) +scores_quantile <- suppressMessages(score(example_quantile)) +scores_continuous <- suppressMessages(score(data = example_continuous)) +scores_point <- suppressMessages(score(example_point)) +scores_binary <- suppressMessages(score(example_binary)) diff --git a/tests/testthat/test-absolute_error.R b/tests/testthat/test-absolute_error.R index a8259328e..f61493b25 100644 --- a/tests/testthat/test-absolute_error.R +++ b/tests/testthat/test-absolute_error.R @@ -2,7 +2,7 @@ test_that("absolute error (sample based) works", { observed <- rnorm(30, mean = 1:30) predicted_values <- rnorm(30, mean = 1:30) - scoringutils <- scoringutils::ae_median_sample(observed, predicted_values) + scoringutils <- ae_median_sample(observed, predicted_values) ae <- abs(observed - predicted_values) expect_equal(ae, scoringutils) @@ -68,11 +68,7 @@ test_that("abs error is correct within score, point forecast only", { eval <- scoringutils::score(data_scoringutils) - # actual <- score_forecasts(forecasts = test_forecasts, truth = test_truth) - expected <- abs(y - point_forecast) - - # expect_equal(actual$abs_error, expected) expect_equal(eval$ae, expected) }) diff --git a/tests/testthat/test-add_coverage.R b/tests/testthat/test-add_coverage.R index 43686ca80..fab1e72a1 100644 --- a/tests/testthat/test-add_coverage.R +++ b/tests/testthat/test-add_coverage.R @@ -1,16 +1,18 @@ +ex_coverage <- scores_quantile[model == "EuroCOVIDhub-ensemble"] + test_that("add_coverage() works as expected", { expect_error( - add_coverage(scores, by = c("model", "target_type"), range = c()) + add_coverage(ex_coverage, by = c("model", "target_type"), range = c()) ) expect_error( - add_coverage(scores, by = c("model", "target_type")), NA + add_coverage(ex_coverage, by = c("model", "target_type")), NA ) cov <- add_coverage( - scores, by = c("model", "target_type"), range = c(10, 50, 80) + scores_quantile, by = c("model", "target_type"), range = c(10, 20) ) expect_equal( grep("coverage_", colnames(cov), value = TRUE), - c("coverage_deviation", "coverage_10", "coverage_50", "coverage_80") + c("coverage_deviation", "coverage_10", "coverage_20") ) }) @@ -18,10 +20,10 @@ test_that("add_coverage() works as expected", { test_that("Order of `add_coverage()` and `summarise_scores()` doesn't matter", { # Need to update test. Turns out the order does matter... # see https://github.com/epiforecasts/scoringutils/issues/367 - pw1 <- add_coverage(scores, by = "model") + pw1 <- add_coverage(ex_coverage, by = "model") pw1_sum <- summarise_scores(pw1, by = "model") - pw2 <- summarise_scores(scores, by = "model") + pw2 <- summarise_scores(ex_coverage, by = "model") pw2 <- add_coverage(pw2) # expect_true(all(pw1_sum == pw2, na.rm = TRUE)) diff --git a/tests/testthat/test-available_forecasts.R b/tests/testthat/test-available_forecasts.R index bb7abf4f1..f8e0ad0c7 100644 --- a/tests/testthat/test-available_forecasts.R +++ b/tests/testthat/test-available_forecasts.R @@ -8,22 +8,26 @@ test_that("available_forecasts() works as expected", { expect_type(af$target_type, "character") expect_type(af$`count`, "integer") expect_equal(nrow(af[is.na(`count`)]), 0) - af <- suppressMessages( - available_forecasts(example_quantile, - by = "model" - ) - ) + af <- available_forecasts(example_quantile, by = "model") expect_equal(nrow(af), 4) expect_equal(af$`count`, c(256, 256, 128, 247)) - af <- suppressMessages( - available_forecasts(example_quantile, + + # Setting `collapse = c()` means that all quantiles and samples are counted + af <- available_forecasts( + example_quantile, by = "model", collapse = c() - ) ) expect_equal(nrow(af), 4) expect_equal(af$`count`, c(5888, 5888, 2944, 5681)) - af <- suppressMessages( - available_forecasts(example_quantile) - ) + + # setting by = NULL, the default, results in by equal to forecast unit + af <- available_forecasts(example_quantile) expect_equal(nrow(af), 50688) + + # check whether collapsing also works for model-based forecasts + af <- available_forecasts(example_integer, by = "model") + expect_equal(nrow(af), 4) + + af <- available_forecasts(example_integer, by = "model", collapse = c()) + expect_equal(af$count, c(10240, 10240, 5120, 9880)) }) diff --git a/tests/testthat/test-get_-functions.R b/tests/testthat/test-get_-functions.R new file mode 100644 index 000000000..217e954bd --- /dev/null +++ b/tests/testthat/test-get_-functions.R @@ -0,0 +1,72 @@ +test_that("get_type() works as expected with vectors", { + expect_equal(get_type(1:3), "integer") + expect_equal(get_type(factor(1:2)), "classification") + expect_equal(get_type(c(1.0, 2)), "integer") + expect_equal(get_type(c(1.0, 2.3)), "continuous") + expect_error( + get_type(c("a", "b")), + "Assertion on 'as.vector(x)' failed: Must be of type 'numeric', not 'character'.", + fixed = TRUE + ) +}) + +test_that("get_type() works as expected with matrices", { + expect_equal(get_type(matrix(1:4, nrow = 2)), "integer") + expect_equal(get_type(matrix(c(1.0, 2:4))), "integer") + expect_equal(get_type(matrix(c(1.0, 2.3, 3, 4))), "continuous") + + # matrix of factors doesn't work + expect_error( + get_type(matrix(factor(1:4), nrow = 2)), + "Assertion on 'as.vector(x)' failed: Must be of type 'numeric', not 'character'.", + fixed = TRUE + ) + + expect_error( + get_type(matrix(c("a", "b", "c", "d"))), + "Assertion on 'as.vector(x)' failed: Must be of type 'numeric', not 'character'.", + fixed = TRUE + ) +}) + + +test_that("new `get_type()` is equal to old `prediction_type()", { + get_prediction_type <- function(data) { + if (is.data.frame(data)) { + data <- data$predicted + } + if ( + isTRUE(all.equal(as.vector(data), as.integer(data))) && + !all(is.na(as.integer(data))) + ) { + return("integer") + } else if (suppressWarnings(!all(is.na(as.numeric(data))))) { + return("continuous") + } else { + stop("Input is not numeric and cannot be coerced to numeric") + } + } + + check_data <- list( + 1:2, + # factor(1:2) # old function would classify as "continuous" + c(1.0, 2), + c(1.0, 2.3), + matrix(1:4, nrow = 2), + matrix(c(1.0, 2:4)), + matrix(c(1.0, 2.3, 3, 4)) + ) + + for (i in seq_along(check_data)) { + expect_equal( + get_prediction_type(check_data[[i]]), + get_type(check_data[[i]]) + ) + } +}) + +test_that("get_type() handles `NA` values", { + expect_equal(get_type(c(1, NA, 3)), "integer") + expect_equal(get_type(c(1, NA, 3.2)), "continuous") + expect_error(get_type(NA), "Can't get type: all values of are NA") +}) diff --git a/tests/testthat/test-input-check-helpers.R b/tests/testthat/test-input-check-helpers.R index 8461d0e2c..d3dc8cd2d 100644 --- a/tests/testthat/test-input-check-helpers.R +++ b/tests/testthat/test-input-check-helpers.R @@ -11,9 +11,9 @@ test_that("Check equal length works if all arguments have length 1", { }) -test_that("Check_not_null works", { +test_that("assert_not_null works", { test_function <- function(argument = NULL) { - scoringutils:::check_not_null("argument" = argument) + scoringutils:::assert_not_null("argument" = argument) return(paste("Input:", argument)) } out <- test_function("works") diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 1ff94ad99..3d0120adb 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -254,15 +254,14 @@ test_that("pairwise_comparison() works", { test_that("pairwise_comparison() works inside and outside of score()", { - eval <- suppressMessages(score(data = example_continuous)) + eval <- scores_continuous pairwise <- suppressMessages(pairwise_comparison(eval, by = "model", metric = "crps" )) - eval2 <- suppressMessages(score(data = example_continuous)) - eval2_summarised <- summarise_scores(eval2, by = "model") + eval2_summarised <- summarise_scores(scores_continuous, by = "model") eval2 <- add_pairwise_comparison(eval2_summarised) expect_equal( @@ -271,19 +270,16 @@ test_that("pairwise_comparison() works inside and outside of score()", { }) test_that("pairwise_comparison() realises when there is no baseline model", { - - scores <- suppressMessages(score(example_quantile)) - expect_error( - pairwise_comparison(scores, baseline = "missing_model"), "missing" + pairwise_comparison(scores_quantile, baseline = "missing_model"), "missing" ) }) test_that("Order of `add_pairwise_comparison()` and `summarise_scores()` doesn't matter", { - pw1 <- suppressMessages(add_pairwise_comparison(scores)) + pw1 <- suppressMessages(add_pairwise_comparison(scores_quantile)) pw1_sum <- summarise_scores(pw1, by = "model") - pw2 <- summarise_scores(scores, by = "model") + pw2 <- summarise_scores(scores_quantile, by = "model") pw2 <- add_pairwise_comparison(pw2) expect_true(all(pw1_sum == pw2, na.rm = TRUE)) diff --git a/tests/testthat/test-plot_heatmap.R b/tests/testthat/test-plot_heatmap.R index 3246fbd64..9118ff21f 100644 --- a/tests/testthat/test-plot_heatmap.R +++ b/tests/testthat/test-plot_heatmap.R @@ -2,10 +2,10 @@ library(ggplot2, quietly = TRUE) test_that("plot_heatmap() works as expected", { scores <- suppressMessages( - summarise_scores(scores, by = c("model", "target_type", "range")) + summarise_scores(scores_quantile, by = c("model", "target_type", "range")) ) p <- plot_heatmap(scores, x = "target_type", metric = "bias") expect_s3_class(p, "ggplot") skip_on_cran() vdiffr::expect_doppelganger("plot_heatmap", p) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-plot_interval_coverage.R b/tests/testthat/test-plot_interval_coverage.R index 6180704a4..04f203b03 100644 --- a/tests/testthat/test-plot_interval_coverage.R +++ b/tests/testthat/test-plot_interval_coverage.R @@ -2,10 +2,10 @@ library(ggplot2, quietly = TRUE) test_that("plot_interval_coverage() works as expected", { scores <- suppressMessages( - summarise_scores(scores, by = c("model", "range")) + summarise_scores(scores_quantile, by = c("model", "range")) ) p <- plot_interval_coverage(scores) expect_s3_class(p, "ggplot") skip_on_cran() vdiffr::expect_doppelganger("plot_interval_coverage", p) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-plot_pairwise_comparison.R b/tests/testthat/test-plot_pairwise_comparison.R index 4cd477e90..ffbf15374 100644 --- a/tests/testthat/test-plot_pairwise_comparison.R +++ b/tests/testthat/test-plot_pairwise_comparison.R @@ -1,5 +1,5 @@ pairwise <- suppressMessages( - pairwise_comparison(scores, by = "target_type") + pairwise_comparison(scores_quantile, by = "target_type") ) test_that("plot_pairwise_comparison() works as expected", { diff --git a/tests/testthat/test-plot_quantile_coverage.R b/tests/testthat/test-plot_quantile_coverage.R index 9b210bfc9..6c3593c04 100644 --- a/tests/testthat/test-plot_quantile_coverage.R +++ b/tests/testthat/test-plot_quantile_coverage.R @@ -1,9 +1,9 @@ test_that("plot_quantile_coverage() works as expected", { scores <- suppressMessages( - summarise_scores(scores, by = c("model", "quantile")) + summarise_scores(scores_quantile, by = c("model", "quantile")) ) p <- plot_quantile_coverage(scores) expect_s3_class(p, "ggplot") skip_on_cran() vdiffr::expect_doppelganger("plot_quantile_coverage", p) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-plot_ranges.R b/tests/testthat/test-plot_ranges.R index d773d4f91..e9ae5575b 100644 --- a/tests/testthat/test-plot_ranges.R +++ b/tests/testthat/test-plot_ranges.R @@ -1,5 +1,5 @@ sum_scores <- suppressMessages( - summarise_scores(scores, by = c("model", "target_type", "range")) + summarise_scores(scores_quantile, by = c("model", "target_type", "range")) ) test_that("plot_ranges() works as expected with interval score", { diff --git a/tests/testthat/test-plot_score_table.R b/tests/testthat/test-plot_score_table.R index 662a1cefc..8336de7a9 100644 --- a/tests/testthat/test-plot_score_table.R +++ b/tests/testthat/test-plot_score_table.R @@ -1,6 +1,6 @@ test_that("plot_score_table() works as expected", { p <- suppressMessages( - scores %>% + scores_quantile %>% add_coverage(by = c("model")) %>% summarise_scores(by = c("model")) %>% summarise_scores(by = c("model"), fun = signif, digits = 1) %>% diff --git a/tests/testthat/test-plot_wis.R b/tests/testthat/test-plot_wis.R index 6e3c92fed..9e3c03409 100644 --- a/tests/testthat/test-plot_wis.R +++ b/tests/testthat/test-plot_wis.R @@ -1,5 +1,5 @@ sum_scores <- suppressMessages( - summarise_scores(scores, by = c("model", "target_type")) + summarise_scores(scores_quantile, by = c("model", "target_type")) ) test_that("plot_wis() works as expected with relative contributions", { @@ -34,4 +34,4 @@ test_that("plot_wis() works as expected when flipped", { expect_s3_class(p, "ggplot") skip_on_cran() vdiffr::expect_doppelganger("plot_wis_flip", p) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index 73c013eb9..9b4040723 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -20,9 +20,7 @@ test_that("function throws an error if data is missing", { # test binary case ------------------------------------------------------------- test_that("function produces output for a binary case", { - binary_example <- data.table::setDT(scoringutils::example_binary) - eval <- suppressMessages(score(binary_example[!is.na(predicted)])) - eval <- summarise_scores(eval, by = c("model", "target_type")) + eval <- summarise_scores(scores_binary, by = c("model", "target_type")) expect_equal( nrow(eval) > 1, @@ -36,13 +34,7 @@ test_that("function produces output for a binary case", { "log_score" ) ) -}) - -test_that("function produces score for a binary case", { - binary_example <- data.table::setDT(scoringutils::example_binary) - eval <- suppressMessages(score(binary_example[!is.na(predicted)])) - eval <- summarise_scores(eval, by = c("model", "target_type")) expect_true("brier_score" %in% names(eval)) }) @@ -55,13 +47,11 @@ test_that("score.scoringutils_binary() errors with only NA values", { }) test_that("score() gives same result for binary as regular function", { - binary_example <- data.table::setDT(scoringutils::example_binary[!is.na(predicted)]) - eval <- suppressMessages(score(binary_example)) manual_eval <- brier_score( - factor(binary_example$observed), - binary_example$predicted + factor(example_binary$observed), + example_binary$predicted ) - expect_equal(eval$brier_score, manual_eval) + expect_equal(scores_binary$brier_score, manual_eval[!is.na(manual_eval)]) }) test_that( @@ -130,25 +120,19 @@ test_that( # providing an additional, unrelated function argument works expect_no_error( - score(example_binary, unnecessary_argument = "unnecessary") + score(df, unnecessary_argument = "unnecessary") ) expect_no_error( - score(example_binary, metrics = list("brier_score" = brier_score), + score(df, metrics = list("brier_score" = brier_score), unnecessary_argument = "unnecessary") ) } ) - - - - # test point case -------------------------------------------------------------- test_that("function produces output for a point case", { - point_example <- data.table::setDT(scoringutils::example_point) - eval <- suppressMessages(score(point_example)) - eval <- summarise_scores(eval, by = c("model", "target_type")) + eval <- summarise_scores(scores_point, by = c("model", "target_type")) expect_equal( nrow(eval) > 1, @@ -181,21 +165,12 @@ test_that("score.scoringutils_point() errors with only NA values", { }) # test quantile case ----------------------------------------------------------- -test_that("function produces output for a quantile format case", { - quantile_example <- data.table::setDT(scoringutils::example_quantile) - eval <- suppressMessages(score(quantile_example[!is.na(predicted)])) - - expect_equal( - nrow(eval) > 1, - TRUE - ) -}) - test_that("score_quantile correctly handles separate results = FALSE", { - quantile_example <- data.table::setDT(scoringutils::example_quantile) + df <- example_quantile[model == "EuroCOVIDhub-ensemble" & + target_type == "Cases" & location == "DE"] eval <- suppressMessages( score( - quantile_example[!is.na(predicted)], + df[!is.na(predicted)], separate_results = FALSE ) ) @@ -227,11 +202,7 @@ test_that("score() quantile produces desired metrics", { test_that("calculation of ae_median is correct for a quantile format case", { - eval <- suppressMessages( - score(scoringutils::example_quantile[!is.na(predicted)]) - ) - - eval <- summarise_scores(eval,by = "model") + eval <- summarise_scores(scores_quantile,by = "model") example <- scoringutils::example_quantile ae <- example[quantile == 0.5, ae := abs(observed - predicted)][!is.na(model), .(mean = mean(ae, na.rm = TRUE)), @@ -243,12 +214,11 @@ test_that("calculation of ae_median is correct for a quantile format case", { test_that("all quantile and range formats yield the same result", { - quantile_example1 <- data.table::setDT(scoringutils::example_quantile) + eval1 <- summarise_scores(scores_quantile, by = "model") - eval1 <- suppressMessages(score(quantile_example1[!is.na(predicted)])) - eval1 <- summarise_scores(eval1, by = "model") + df <- data.table::copy(example_quantile) - ae <- quantile_example1[ + ae <- df[ quantile == 0.5, ae := abs(observed - predicted)][ !is.na(model), .(mean = mean(ae, na.rm = TRUE)), by = "model" @@ -273,7 +243,7 @@ test_that("WIS is the same with other metrics omitted or included", { metrics = "interval_score" )) - eval2 <- suppressMessages(score(example_quantile)) + eval2 <- scores_quantile expect_equal( sum(eval$interval_score), @@ -296,15 +266,8 @@ test_that("score.scoringutils_quantile() errors with only NA values", { # test integer and continuous case --------------------------------------------- test_that("function produces output for a continuous format case", { - example <- data.table::setDT(scoringutils::example_continuous) - eval <- suppressMessages(score(example[!is.na(predicted)])) - - eval2 <- suppressMessages(score(example)) - data.table::setcolorder(eval2, colnames(eval)) - eval <- eval[order(model)] - eval2 <- eval2[order(model)] - all(eval == eval2, na.rm = TRUE) + eval <- scores_continuous only_nas <- copy(example_continuous)[, predicted := NA_real_] expect_error( diff --git a/tests/testthat/test-summarise_scores.R b/tests/testthat/test-summarise_scores.R index 3c5ab37fc..d3fd830ce 100644 --- a/tests/testthat/test-summarise_scores.R +++ b/tests/testthat/test-summarise_scores.R @@ -1,10 +1,10 @@ test_that("summarise_scores() works without any arguments", { - expect_true("quantile" %in% names(scores)) + expect_true("quantile" %in% names(scores_quantile)) - scores <- summarise_scores(scores) - expect_false("quantile" %in% names(scores)) + summarised_scores <- summarise_scores(scores_quantile) + expect_false("quantile" %in% names(summarised_scores)) - s2 <- summarise_scores(scores, + s2 <- summarise_scores(scores_quantile, by = c( "location", "target_end_date", "target_type", "location_name", "forecast_date", "model", @@ -12,19 +12,19 @@ test_that("summarise_scores() works without any arguments", { ) ) - expect_equal(nrow(scores), nrow(s2)) + expect_equal(nrow(summarised_scores), nrow(s2)) }) test_that("summarise_scores() handles wrong by argument well", { expect_error( - summarise_scores(scores, by = "not_present"), + summarise_scores(scores_quantile, by = "not_present"), "Column 'not_present' not found in data.", # nolint fixed = TRUE ) expect_error( - summarise_scores(scores, by = "sample_id"), + summarise_scores(scores_quantile, by = "sample_id"), "Column 'sample_id' not found in data.", fixed = TRUE ) @@ -43,7 +43,6 @@ test_that("summarise_scores() works with point forecasts in a quantile format", ) ) - scores_point <- suppressMessages(score(example_point)) summarised_scores <- summarise_scores(scores_point, by = "model") expect_no_condition( @@ -64,11 +63,8 @@ test_that("summarise_scores() works with point forecasts in a quantile format", }) test_that("summarise_scores() can compute relative measures", { - ex <- data.table::copy(example_quantile) - scores <- suppressMessages(score(ex)) - scores_with <- add_pairwise_comparison( - summarise_scores(scores, by = "model") + summarise_scores(scores_quantile, by = "model") ) expect_equal( @@ -77,7 +73,7 @@ test_that("summarise_scores() can compute relative measures", { ) scores_with <- add_pairwise_comparison( - summarise_scores(scores, by = "model"), + summarise_scores(scores_quantile, by = "model"), relative_skill_metric = "ae_median" ) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index def89fbb2..a909c7d46 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -35,77 +35,6 @@ test_that("get_protected_columns() returns the correct result", { expect_equal(sort(manual), sort(auto)) }) -test_that("get_prediction_type() correctly identifies quantile predictions", { - data <- data.frame( - predicted = 1:3, - quantile = c(0.1, 0.5, 0.9) - ) - - expect_equal(get_prediction_type(data), "discrete") -}) - -test_that("get_prediction_type() correctly identifies integer predictions", { - data <- data.frame( - predicted = as.integer(1:5) - ) - - expect_equal(get_prediction_type(data), "discrete") - - data <- matrix(as.integer(1:9), nrow = 3) - expect_equal(get_prediction_type(data), "discrete") -}) - -test_that("get_prediction_type() correctly identifies continuous predictions", { - data <- data.frame( - predicted = rnorm(5) - ) - - expect_equal(get_prediction_type(data), "continuous") -}) - -test_that("works with vector input", { - predictions <- rnorm(5) - - expect_equal(get_prediction_type(predictions), "continuous") -}) - -test_that("get_prediction_type() returns error on invalid input", { - suppressWarnings(expect_error(get_prediction_type("foo"))) -}) - -test_that("get_prediction_type() handles NA values across prediction types", { - # Quantile - # data <- data.frame( - # predicted = c(1, NA, 3), - # quantile = c(0.1, 0.5, 0.9) - # ) - # expect_equal(get_prediction_type(data), "quantile") - - # Integer - data <- data.frame( - predicted = c(1, NA, 3) - ) - expect_equal(get_prediction_type(data), "discrete") - - # Continuous - data <- data.frame( - predicted = c(1.1, NA, 3.2) - ) - expect_equal(get_prediction_type(data), "continuous") - # predictions <- c(1.1, NA, 3.5) - # expect_equal(get_prediction_type(predictions), "continuous") - - # All NA - data <- data.frame(predicted = NA) - expect_error( - get_prediction_type(data), - "Input is not numeric and cannot be coerced to numeric" - ) - expect_error( - get_prediction_type(NA_real_), - "Input is not numeric and cannot be coerced to numeric" - ) -}) # test_that("prediction_is_quantile() correctly identifies quantile predictions", { # data <- data.frame( diff --git a/vignettes/metric-details.Rmd b/vignettes/metric-details.Rmd index 1a5176c1f..300a501fc 100644 --- a/vignettes/metric-details.Rmd +++ b/vignettes/metric-details.Rmd @@ -26,8 +26,8 @@ library(data.table) This table gives an overview for when which metric can be applied and gives a very brief description. Note that this table on shows the metrics as implemented in `scoringutils`. For example, only scoring of sample-based discrete and continuous distributions is implemented in `scoringutils`, but closed-form solutions often exist (e.g. in the `scoringRules` package). ```{r, echo = FALSE, results = "asis"} -data <- copy(metrics) -setnames(data, old = c("Discrete", "Continuous", "Binary", "Quantile"), +data <- copy(metrics) +setnames(data, old = c("Discrete", "Continuous", "Binary", "Quantile"), new = c("D", "C", "B", "Q")) data[, c("Name", "Functions") := NULL] @@ -45,8 +45,8 @@ data$Q <- replace(data$Q) data[, 1:6] %>% kbl(format = "html", escape = FALSE, - align = c("lccccl"), - linesep = c('\\addlinespace')) %>% + align = "lccccl", + linesep = "\\addlinespace") %>% column_spec(1, width = "3.2cm") %>% column_spec(2, width = "1.5cm") %>% column_spec(3, width = "1.5cm") %>% @@ -62,9 +62,11 @@ data[, 1:6] %>% ## Detailed explanation of the metrics implemented in `scoringutils` ```{r, echo = FALSE, results = "asis"} - data <- readRDS( - system.file("metrics-overview/metrics-detailed.rds", package = "scoringutils") + system.file( + "metrics-overview", "metrics-detailed.rds", + package = "scoringutils" + ) ) data[, 1:2] %>% diff --git a/vignettes/scoring-forecasts-directly.Rmd b/vignettes/scoring-forecasts-directly.Rmd index e58001d9e..d7a68736f 100644 --- a/vignettes/scoring-forecasts-directly.Rmd +++ b/vignettes/scoring-forecasts-directly.Rmd @@ -216,4 +216,3 @@ interval_score( interval_range = interval_range ) ``` -