From 0eeaec23c13669bc8bac0dc4125eddf33b45dd69 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 13:24:58 +0100 Subject: [PATCH 1/6] Rename `add_pairwise_comparison()` to `add_relative_skill()` --- NAMESPACE | 2 +- NEWS.md | 2 +- R/pairwise-comparisons.R | 2 +- README.Rmd | 2 +- README.md | 2 +- man/add_pairwise_comparison.Rd | 6 +-- tests/testthat/test-pairwise_comparison.R | 52 +++++++++++------------ vignettes/scoringutils.Rmd | 4 +- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index cf19c6db3..76d87eabb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,7 @@ S3method(validate_forecast,forecast_binary) S3method(validate_forecast,forecast_point) S3method(validate_forecast,forecast_quantile) S3method(validate_forecast,forecast_sample) -export(add_pairwise_comparison) +export(add_relative_skill) export(ae_median_quantile) export(ae_median_sample) export(as_forecast) diff --git a/NEWS.md b/NEWS.md index 7a388ff66..f033206e5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,7 +26,7 @@ The update introduces breaking changes. If you want to keep using the older vers - `score()` now returns objects of class `scores` with a stored attribute `metrics` that holds the names of the scoring rules that were used. Users can call `get_metrics()` to access the names of those scoring rules. - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`. -- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds columns with relative skil scores and scaled relative skill scores. +- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_relative_skill()`, was introduced that takes summarised scores as an input and adds columns with relative skill scores and scaled relative skill scores. - `add_coverage()` was replaced by a new function, `get_coverage()`. This function comes with an updated workflow where coverage values are computed directly based on the original data and can then be visualised using `plot_interval_coverage()` or `plot_quantile_coverage()`. An example worfklow would be `example_quantile |> as_forecast() |> get_coverage(by = "model") |> plot_interval_coverage()`. - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs) - The function `bias_range()` was removed (users should now use `bias_quantile()` instead) diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index 381345c03..b21d2065f 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -515,7 +515,7 @@ permutation_test <- function(scores1, #' @inheritParams pairwise_comparison #' @export #' @keywords keyword scoring -add_pairwise_comparison <- function( +add_relative_skill <- function( scores, by = "model", metric = intersect(c("wis", "crps", "brier_score"), names(scores)), diff --git a/README.Rmd b/README.Rmd index 8f7b944bf..911a71a3d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -108,7 +108,7 @@ example_quantile %>% "location", "target_end_date", "target_type", "horizon", "model" )) %>% score() %>% - add_pairwise_comparison( + add_relative_skill( by = c("model", "target_type"), baseline = "EuroCOVIDhub-ensemble" ) %>% diff --git a/README.md b/README.md index ef8f97081..24abcf720 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ example_quantile %>% "location", "target_end_date", "target_type", "horizon", "model" )) %>% score() %>% - add_pairwise_comparison( + add_relative_skill( by = c("model", "target_type"), baseline = "EuroCOVIDhub-ensemble" ) %>% diff --git a/man/add_pairwise_comparison.Rd b/man/add_pairwise_comparison.Rd index 57f93133d..d7a06bf40 100644 --- a/man/add_pairwise_comparison.Rd +++ b/man/add_pairwise_comparison.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/pairwise-comparisons.R -\name{add_pairwise_comparison} -\alias{add_pairwise_comparison} +\name{add_relative_skill} +\alias{add_relative_skill} \title{Add pairwise comparisons} \usage{ -add_pairwise_comparison( +add_relative_skill( scores, by = "model", metric = intersect(c("wis", "crps", "brier_score"), names(scores)), diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 5dffe29b2..6a69369bc 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -65,11 +65,11 @@ test_that("pairwise_comparison() works", { ) ) eval_without_baseline <- suppressMessages( - add_pairwise_comparison(eval_without_rel_skill) + add_relative_skill(eval_without_rel_skill) ) eval_with_baseline <- suppressMessages( - add_pairwise_comparison(eval_without_rel_skill, baseline = "m1") + add_relative_skill(eval_without_rel_skill, baseline = "m1") ) @@ -203,7 +203,7 @@ test_that("pairwise_comparison() works", { eval <- score(data_formatted) eval_summarised <- summarise_scores(eval, by = c("model", "location")) - eval_with_baseline <- add_pairwise_comparison(eval, by = c("model", "location"), baseline = "m1") + eval_with_baseline <- add_relative_skill(eval, by = c("model", "location"), baseline = "m1") eval_with_baseline <- summarise_scores(eval_with_baseline, by = c("model", "location")) relative_skills_with <- eval_with_baseline[ @@ -220,7 +220,7 @@ test_that("pairwise_comparison() works", { test_that("pairwise_comparison() work in score() with integer data", { eval <- suppressMessages(score(data = as_forecast(example_integer))) eval_summarised <- summarise_scores(eval, by = c("model", "target_type")) - eval <- add_pairwise_comparison(eval_summarised) + eval <- add_relative_skill(eval_summarised) expect_true("crps_relative_skill" %in% colnames(eval)) }) @@ -228,7 +228,7 @@ test_that("pairwise_comparison() work in score() with integer data", { test_that("pairwise_comparison() work in score() with binary data", { eval <- suppressMessages(score(data = as_forecast(example_binary))) eval_summarised <- summarise_scores(eval, by = c("model", "target_type")) - eval <- add_pairwise_comparison(eval_summarised) + eval <- add_relative_skill(eval_summarised) expect_true("brier_score_relative_skill" %in% colnames(eval)) }) @@ -256,7 +256,7 @@ test_that("pairwise_comparison() works", { }) -test_that("pairwise_comparison() and `add_pairwise_comparison()` give same result", { +test_that("pairwise_comparison() and `add_relative_skill()` give same result", { eval <- scores_continuous pairwise <- pairwise_comparison(eval, @@ -264,7 +264,7 @@ test_that("pairwise_comparison() and `add_pairwise_comparison()` give same resul metric = "crps" ) - eval2 <- add_pairwise_comparison(scores_continuous, by = "model") + eval2 <- add_relative_skill(scores_continuous, by = "model") eval2 <- summarise_scores(eval2, by = "model") expect_equal( @@ -279,12 +279,12 @@ test_that("pairwise_comparison() realises when there is no baseline model", { ) }) -test_that("Basic input checks for `add_pairwise_comparison() work", { +test_that("Basic input checks for `add_relative_skill() work", { eval <- data.table::copy(scores_continuous) # check that model column + columns in 'by' + baseline model are present expect_error( - add_pairwise_comparison( + add_relative_skill( eval, by = c("model", "missing"), metric = "crps" ), "Not all columns specified in `by` are present:" @@ -292,7 +292,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # error if baseline is not present expect_error( - add_pairwise_comparison( + add_relative_skill( eval, by = "model", baseline = "missing", metric = "crps" ), "Assertion on 'baseline' failed: Must be a subset of" @@ -301,12 +301,12 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # error if not enough models are present eval_few <- eval[model %in% c("EuroCOVIDhub-ensemble", "EuroCOVIDhub-baseline")] expect_no_error( - add_pairwise_comparison( + add_relative_skill( eval_few, by = "model", metric = "crps" ) ) expect_error( - add_pairwise_comparison( + add_relative_skill( eval_few, by = "model", baseline = "EuroCOVIDhub-baseline", metric = "crps" ), @@ -315,14 +315,14 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # error if no relative skill metric is found expect_error( - add_pairwise_comparison( + add_relative_skill( eval, by = "model", metric = "missing" ) ) eval_nometric <- data.table::copy(eval)[, "crps" := NULL] expect_error( - suppressWarnings(add_pairwise_comparison( + suppressWarnings(add_relative_skill( eval_nometric, by = "model" )), "Assertion on 'metric' failed: Must be a subset of " @@ -331,7 +331,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # error if no model column is found eval_nomodel <- data.table::copy(eval)[, "model" := NULL] expect_error( - add_pairwise_comparison( + add_relative_skill( eval_nomodel, by = "target_type", metric = "crps" ), "Assertion on 'scores' failed: Column 'model' not found in data." @@ -341,7 +341,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { eval_noattribute <- data.table::copy(eval) attr(eval_noattribute, "metrics") <- NULL expect_error( - add_pairwise_comparison( + add_relative_skill( eval_noattribute, by = "model", metric = "crps" ), "needs an attribute `metrics`" @@ -351,7 +351,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { eval_nas <- data.table::copy(eval) eval_nas[1:10, "crps" := NA] expect_warning( - add_pairwise_comparison( + add_relative_skill( eval_nas, by = "model", metric = "crps" ), "Some values for the metric `crps` are NA. These have been removed." @@ -360,7 +360,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # warning if there are no values left after removing NAs eval_nas[, "crps" := NA] expect_error( - add_pairwise_comparison( + add_relative_skill( eval_nas, by = "model", metric = "crps" ), "After removing \"NA\" values for `crps`, no values were left." @@ -370,7 +370,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { eval_diffsign <- data.table::copy(eval) eval_diffsign[1:10, "crps" := -eval_diffsign[1:10, "crps"]] expect_error( - add_pairwise_comparison( + add_relative_skill( eval_diffsign, by = "model", metric = "crps" ), "To compute pairwise comparisons, all values of `crps` must have the same sign." @@ -379,7 +379,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # message if `by` is equal to the forecast unit fu <- get_forecast_unit(eval) expect_message( - add_pairwise_comparison( + add_relative_skill( eval, by = fu, metric = "crps"), "relative skill can only be computed if `by` is different from the unit of a single forecast." ) @@ -387,7 +387,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", { # warning if by is equal to the forecast unit and also by is "model" eval_summ <- summarise_scores(eval, by = "model") expect_warning( - add_pairwise_comparison( + add_relative_skill( eval_summ, by = "model", metric = "crps" ), "`by` is set to 'model', which is also the unit of a single forecast." @@ -448,9 +448,9 @@ test_that("compare_two_models() throws error with wrong inputs", { ) }) -test_that("add_pairwise_comparison() works with point forecasts", { +test_that("add_relative_skill() works with point forecasts", { expect_no_condition( - pw_point <- add_pairwise_comparison( + pw_point <- add_relative_skill( scores_point, metric = "se_point" ) @@ -467,8 +467,8 @@ test_that("add_pairwise_comparison() works with point forecasts", { ) }) -test_that("add_pairwise_comparison() can compute relative measures", { - scores_with <- add_pairwise_comparison( +test_that("add_relative_skill() can compute relative measures", { + scores_with <- add_relative_skill( scores_quantile, ) scores_with <- summarise_scores(scores_with, by = "model") @@ -478,7 +478,7 @@ test_that("add_pairwise_comparison() can compute relative measures", { c(1.6, 0.81, 0.75, 1.03), tolerance = 0.01 ) - scores_with <- add_pairwise_comparison( + scores_with <- add_relative_skill( scores_quantile, by = "model", metric = "ae_median" ) diff --git a/vignettes/scoringutils.Rmd b/vignettes/scoringutils.Rmd index b02c9c3ff..03388c4a4 100644 --- a/vignettes/scoringutils.Rmd +++ b/vignettes/scoringutils.Rmd @@ -181,7 +181,7 @@ In order to better compare models against each other we can use relative scores ```{r} score(as_forecast(example_quantile)) %>% - add_pairwise_comparison( + add_relative_skill( by = c("model", "target_type"), baseline = "EuroCOVIDhub-ensemble" ) %>% @@ -295,7 +295,7 @@ forecast_quantile %>% forecast_quantile %>% score() %>% summarise_scores(by = "model") %>% - add_pairwise_comparison(baseline = "EuroCOVIDhub-baseline") + add_relative_skill(baseline = "EuroCOVIDhub-baseline") ``` If using the `pairwise_comparison()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty. From 7eb4c18d9692a6cba0f3760362652505e8399dcf Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 13:50:40 +0100 Subject: [PATCH 2/6] rename pairwise_comparison and plot_pairwise_comparison --- NAMESPACE | 4 +- R/pairwise-comparisons.R | 40 ++++++++--------- R/plot.R | 8 ++-- inst/create-metric-tables.R | 6 +-- .../R/00-standalone-Figure-replication.R | 6 +-- man/add_pairwise_comparison.Rd | 45 ------------------- man/add_relative_skill.Rd | 23 ++++++++++ man/compare_two_models.Rd | 9 +--- man/correlation.Rd | 3 -- man/geometric_mean.Rd | 2 +- ...parison.Rd => get_pairwise_comparisons.Rd} | 10 ++--- man/pairwise_comparison_one_group.Rd | 10 ++--- man/permutation_test.Rd | 2 +- ...arison.Rd => plot_pairwise_comparisons.Rd} | 12 ++--- man/plot_score_table.Rd | 3 -- tests/testthat/test-pairwise_comparison.R | 24 +++++----- .../testthat/test-plot_pairwise_comparison.R | 10 ++--- vignettes/scoringutils.Rmd | 10 ++--- 18 files changed, 96 insertions(+), 131 deletions(-) delete mode 100644 man/add_pairwise_comparison.Rd create mode 100644 man/add_relative_skill.Rd rename man/{pairwise_comparison.Rd => get_pairwise_comparisons.Rd} (93%) rename man/{plot_pairwise_comparison.Rd => plot_pairwise_comparisons.Rd} (72%) diff --git a/NAMESPACE b/NAMESPACE index 76d87eabb..a9a2c53f8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(get_forecast_counts) export(get_forecast_type) export(get_forecast_unit) export(get_metrics) +export(get_pairwise_comparisons) export(interval_coverage) export(interval_coverage_deviation) export(is_forecast) @@ -54,14 +55,13 @@ export(metrics_quantile) export(metrics_sample) export(new_forecast) export(overprediction) -export(pairwise_comparison) export(pit) export(pit_sample) export(plot_correlation) export(plot_forecast_counts) export(plot_heatmap) export(plot_interval_coverage) -export(plot_pairwise_comparison) +export(plot_pairwise_comparisons) export(plot_pit) export(plot_quantile_coverage) export(plot_score_table) diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index b21d2065f..43264f786 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -60,18 +60,18 @@ #' } #' #' scores <- score(as_forecast(example_quantile)) -#' pairwise <- pairwise_comparison(scores, by = "target_type") +#' pairwise <- get_pairwise_comparisons(scores, by = "target_type") #' #' library(ggplot2) -#' plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") + +#' plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") + #' facet_wrap(~target_type) -pairwise_comparison <- function( - scores, - by = "model", - metric = intersect(c("wis", "crps", "brier_score"), names(scores)), - baseline = NULL, - ... +get_pairwise_comparisons <- function( + scores, + by = "model", + metric = intersect(c("wis", "crps", "brier_score"), names(scores)), + baseline = NULL, + ... ) { # input checks --------------------------------------------------------------- @@ -204,14 +204,14 @@ pairwise_comparison <- function( #' @description #' #' This function does the pairwise comparison for one set of forecasts, but -#' multiple models involved. It gets called from [pairwise_comparison()]. -#' [pairwise_comparison()] splits the data into arbitrary subgroups specified -#' by the user (e.g. if pairwise comparison should be done separately for -#' different forecast targets) and then the actual pairwise comparison for that -#' subgroup is managed from [pairwise_comparison_one_group()]. In order to +#' multiple models involved. It gets called from [get_pairwise_comparisons()]. +#' [get_pairwise_comparisons()] splits the data into arbitrary subgroups +#' specified by the user (e.g. if pairwise comparison should be done separately +#' for different forecast targets) and then the actual pairwise comparison for +#' that subgroup is managed from [pairwise_comparison_one_group()]. In order to #' actually do the comparison between two models over a subset of common #' forecasts it calls [compare_two_models()]. -#' @inherit pairwise_comparison params return +#' @inherit get_pairwise_comparisons params return #' @importFrom cli cli_abort #' @keywords internal @@ -342,7 +342,7 @@ pairwise_comparison_one_group <- function(scores, #' from [pairwise_comparison_one_group()], which handles the #' comparison of multiple models on a single set of forecasts (there are no #' subsets of forecasts to be distinguished). [pairwise_comparison_one_group()] -#' in turn gets called from from [pairwise_comparison()] which can handle +#' in turn gets called from from [get_pairwise_comparisons()] which can handle #' pairwise comparisons for a set of forecasts with multiple subsets, e.g. #' pairwise comparisons for one set of forecasts, but done separately for two #' different forecast targets. @@ -430,7 +430,7 @@ compare_two_models <- function(scores, #' @title Calculate Geometric Mean #' #' @details -#' Used in [pairwise_comparison()]. +#' Used in [get_pairwise_comparisons()]. #' #' @param x numeric vector of values for which to calculate the geometric mean #' @return the geometric mean of the values in `x`. `NA` values are ignored. @@ -452,7 +452,7 @@ geometric_mean <- function(x) { #' the two. This observed difference or ratio is compared against the same #' test statistic based on permutations of the original data. #' -#' Used in [pairwise_comparison()]. +#' Used in [get_pairwise_comparisons()]. #' #' @param scores1 vector of scores to compare against another vector of scores #' @param scores2 A second vector of scores to compare against the first @@ -509,7 +509,7 @@ permutation_test <- function(scores1, #' @description Adds a columns with relative skills computed by running #' pairwise comparisons on the scores. #' For more information on -#' the computation of relative skill, see [pairwise_comparison()]. +#' the computation of relative skill, see [get_pairwise_comparisons()]. #' Relative skill will be calculated for the aggregation level specified in #' `by`. #' @inheritParams pairwise_comparison @@ -522,9 +522,9 @@ add_relative_skill <- function( baseline = NULL ) { - # input checks are done in `pairwise_comparison()` + # input checks are done in `get_pairwise_comparisons()` # do pairwise comparisons ---------------------------------------------------- - pairwise <- pairwise_comparison( + pairwise <- get_pairwise_comparisons( scores = scores, metric = metric, baseline = baseline, diff --git a/R/plot.R b/R/plot.R index 2d951a0c4..29036c48e 100644 --- a/R/plot.R +++ b/R/plot.R @@ -400,7 +400,7 @@ plot_quantile_coverage <- function(coverage, #' between models #' #' @param comparison_result A data.frame as produced by -#' [pairwise_comparison()] +#' [get_pairwise_comparisons()] #' @param type character vector of length one that is either #' "mean_scores_ratio" or "pval". This denotes whether to #' visualise the ratio or the p-value of the pairwise comparison. @@ -417,11 +417,11 @@ plot_quantile_coverage <- function(coverage, #' @examples #' library(ggplot2) #' scores <- score(as_forecast(example_quantile)) -#' pairwise <- pairwise_comparison(scores, by = "target_type") -#' plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") + +#' pairwise <- get_pairwise_comparisons(scores, by = "target_type") +#' plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") + #' facet_wrap(~target_type) -plot_pairwise_comparison <- function(comparison_result, +plot_pairwise_comparisons <- function(comparison_result, type = c("mean_scores_ratio", "pval")) { comparison_result <- data.table::as.data.table(comparison_result) diff --git a/inst/create-metric-tables.R b/inst/create-metric-tables.R index 56accd9d4..8d1e881a0 100644 --- a/inst/create-metric-tables.R +++ b/inst/create-metric-tables.R @@ -189,7 +189,7 @@ pit <- list( mean_score_ratio <- list( `Metric` = "Mean score ratio", `Name` = r"(mean_scores_ratio)", - `Functions` = r"(pairwise_comparison())", + `Functions` = r"(get_pairwise_comparisons())", `D` = r"($\sim$)", `C` = r"($\sim$)", `B` = r"($\sim$)", @@ -201,7 +201,7 @@ mean_score_ratio <- list( relative_skill <- list( `Metric` = "Relative skill", `Name` = list("relative_skill"), - `Functions` = r"(score(), pairwise_comparison())", + `Functions` = r"(score(), get_pairwise_comparisons())", `D` = r"($\sim$)", `C` = r"($\sim$)", `B` = r"($\sim$)", @@ -213,7 +213,7 @@ relative_skill <- list( scaled_relative_skill <- list( `Metric` = "Scaled relative skill", `Name` = "scaled_rel_skill", - `Functions` = r"(score(), pairwise_comparison())", + `Functions` = r"(score(), get_pairwise_comparisons())", `D` = r"($\sim$)", `C` = r"($\sim$)", `B` = r"($\sim$)", diff --git a/inst/manuscript/R/00-standalone-Figure-replication.R b/inst/manuscript/R/00-standalone-Figure-replication.R index c85a54584..9903fe6a7 100644 --- a/inst/manuscript/R/00-standalone-Figure-replication.R +++ b/inst/manuscript/R/00-standalone-Figure-replication.R @@ -575,9 +575,9 @@ score(example_quantile) |> # Figure 9 # =============================================================================# score(example_quantile) |> - pairwise_comparison(by = c("model", "target_type"), - baseline = "EuroCOVIDhub-baseline") |> - plot_pairwise_comparison() + + get_pairwise_comparisons(by = c("model", "target_type"), + baseline = "EuroCOVIDhub-baseline") |> + plot_pairwise_comparisons() + facet_wrap(~ target_type) diff --git a/man/add_pairwise_comparison.Rd b/man/add_pairwise_comparison.Rd deleted file mode 100644 index d7a06bf40..000000000 --- a/man/add_pairwise_comparison.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pairwise-comparisons.R -\name{add_relative_skill} -\alias{add_relative_skill} -\title{Add pairwise comparisons} -\usage{ -add_relative_skill( - scores, - by = "model", - metric = intersect(c("wis", "crps", "brier_score"), names(scores)), - baseline = NULL -) -} -\arguments{ -\item{scores}{An object of class \code{scores} (a data.table with -scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} - -\item{by}{character vector with column names that define the grouping level -for the pairwise comparisons. By default (\code{model}), there will be one -relative skill score per model. If, for example, -\code{by = c("model", "location")}. Then you will get a -separate relative skill score for every model in every location. Internally, -the data.table with scores will be split according \code{by} (removing "model" -before splitting) and the pairwise comparisons will be computed separately -for the split data.tables.} - -\item{metric}{A string with the name of the metric for which -a relative skill shall be computed. By default this is either "crps", -"wis" or "brier_score" if any of these are available.} - -\item{baseline}{A string with the name of a model. If a baseline is -given, then a scaled relative skill with respect to the baseline will be -returned. By default (\code{NULL}), relative skill will not be scaled with -respect to a baseline model.} -} -\description{ -Adds a columns with relative skills computed by running -pairwise comparisons on the scores. -For more information on -the computation of relative skill, see \code{\link[=pairwise_comparison]{pairwise_comparison()}}. -Relative skill will be calculated for the aggregation level specified in -\code{by}. -} -\keyword{keyword} -\keyword{scoring} diff --git a/man/add_relative_skill.Rd b/man/add_relative_skill.Rd new file mode 100644 index 000000000..83716456b --- /dev/null +++ b/man/add_relative_skill.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pairwise-comparisons.R +\name{add_relative_skill} +\alias{add_relative_skill} +\title{Add pairwise comparisons} +\usage{ +add_relative_skill( + scores, + by = "model", + metric = intersect(c("wis", "crps", "brier_score"), names(scores)), + baseline = NULL +) +} +\description{ +Adds a columns with relative skills computed by running +pairwise comparisons on the scores. +For more information on +the computation of relative skill, see \code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}}. +Relative skill will be calculated for the aggregation level specified in +\code{by}. +} +\keyword{keyword} +\keyword{scoring} diff --git a/man/compare_two_models.Rd b/man/compare_two_models.Rd index c8b34e49b..6a595b052 100644 --- a/man/compare_two_models.Rd +++ b/man/compare_two_models.Rd @@ -15,17 +15,10 @@ compare_two_models( ) } \arguments{ -\item{scores}{An object of class \code{scores} (a data.table with -scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} - \item{name_model1}{character, name of the first model} \item{name_model2}{character, name of the model to compare against} -\item{metric}{A string with the name of the metric for which -a relative skill shall be computed. By default this is either "crps", -"wis" or "brier_score" if any of these are available.} - \item{one_sided}{Boolean, default is \code{FALSE}, whether two conduct a one-sided instead of a two-sided test to determine significance in a pairwise comparison.} @@ -47,7 +40,7 @@ both models have made a prediction. It gets called from \code{\link[=pairwise_comparison_one_group]{pairwise_comparison_one_group()}}, which handles the comparison of multiple models on a single set of forecasts (there are no subsets of forecasts to be distinguished). \code{\link[=pairwise_comparison_one_group]{pairwise_comparison_one_group()}} -in turn gets called from from \code{\link[=pairwise_comparison]{pairwise_comparison()}} which can handle +in turn gets called from from \code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}} which can handle pairwise comparisons for a set of forecasts with multiple subsets, e.g. pairwise comparisons for one set of forecasts, but done separately for two different forecast targets. diff --git a/man/correlation.Rd b/man/correlation.Rd index e4fc55985..b90f41d51 100644 --- a/man/correlation.Rd +++ b/man/correlation.Rd @@ -7,9 +7,6 @@ correlation(scores, metrics = NULL, digits = NULL) } \arguments{ -\item{scores}{An object of class \code{scores} (a data.table with -scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} - \item{metrics}{A character vector with the metrics to show. If set to \code{NULL} (default), all metrics present in \code{scores} will be shown} diff --git a/man/geometric_mean.Rd b/man/geometric_mean.Rd index 316715299..4f42b0e3e 100644 --- a/man/geometric_mean.Rd +++ b/man/geometric_mean.Rd @@ -16,6 +16,6 @@ the geometric mean of the values in \code{x}. \code{NA} values are ignored. Calculate Geometric Mean } \details{ -Used in \code{\link[=pairwise_comparison]{pairwise_comparison()}}. +Used in \code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}}. } \keyword{internal} diff --git a/man/pairwise_comparison.Rd b/man/get_pairwise_comparisons.Rd similarity index 93% rename from man/pairwise_comparison.Rd rename to man/get_pairwise_comparisons.Rd index 0a1b06abd..dcfa18d45 100644 --- a/man/pairwise_comparison.Rd +++ b/man/get_pairwise_comparisons.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/pairwise-comparisons.R -\name{pairwise_comparison} -\alias{pairwise_comparison} +\name{get_pairwise_comparisons} +\alias{get_pairwise_comparisons} \title{Do Pairwise Comparisons of Scores} \usage{ -pairwise_comparison( +get_pairwise_comparisons( scores, by = "model", metric = intersect(c("wis", "crps", "brier_score"), names(scores)), @@ -71,10 +71,10 @@ Andrea Riebler and Michaela Paul. } scores <- score(as_forecast(example_quantile)) -pairwise <- pairwise_comparison(scores, by = "target_type") +pairwise <- get_pairwise_comparisons(scores, by = "target_type") library(ggplot2) -plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") + +plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") + facet_wrap(~target_type) } \author{ diff --git a/man/pairwise_comparison_one_group.Rd b/man/pairwise_comparison_one_group.Rd index ea304eb29..f154aeaef 100644 --- a/man/pairwise_comparison_one_group.Rd +++ b/man/pairwise_comparison_one_group.Rd @@ -36,11 +36,11 @@ A data.table with pairwise comparisons } \description{ This function does the pairwise comparison for one set of forecasts, but -multiple models involved. It gets called from \code{\link[=pairwise_comparison]{pairwise_comparison()}}. -\code{\link[=pairwise_comparison]{pairwise_comparison()}} splits the data into arbitrary subgroups specified -by the user (e.g. if pairwise comparison should be done separately for -different forecast targets) and then the actual pairwise comparison for that -subgroup is managed from \code{\link[=pairwise_comparison_one_group]{pairwise_comparison_one_group()}}. In order to +multiple models involved. It gets called from \code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}}. +\code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}} splits the data into arbitrary subgroups +specified by the user (e.g. if pairwise comparison should be done separately +for different forecast targets) and then the actual pairwise comparison for +that subgroup is managed from \code{\link[=pairwise_comparison_one_group]{pairwise_comparison_one_group()}}. In order to actually do the comparison between two models over a subset of common forecasts it calls \code{\link[=compare_two_models]{compare_two_models()}}. } diff --git a/man/permutation_test.Rd b/man/permutation_test.Rd index 56a26ecb9..5398025bf 100644 --- a/man/permutation_test.Rd +++ b/man/permutation_test.Rd @@ -40,6 +40,6 @@ vector independently and then takes either the difference or the ratio of the two. This observed difference or ratio is compared against the same test statistic based on permutations of the original data. -Used in \code{\link[=pairwise_comparison]{pairwise_comparison()}}. +Used in \code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}}. } \keyword{internal} diff --git a/man/plot_pairwise_comparison.Rd b/man/plot_pairwise_comparisons.Rd similarity index 72% rename from man/plot_pairwise_comparison.Rd rename to man/plot_pairwise_comparisons.Rd index 8695861d9..517644916 100644 --- a/man/plot_pairwise_comparison.Rd +++ b/man/plot_pairwise_comparisons.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R -\name{plot_pairwise_comparison} -\alias{plot_pairwise_comparison} +\name{plot_pairwise_comparisons} +\alias{plot_pairwise_comparisons} \title{Plot Heatmap of Pairwise Comparisons} \usage{ -plot_pairwise_comparison( +plot_pairwise_comparisons( comparison_result, type = c("mean_scores_ratio", "pval") ) } \arguments{ \item{comparison_result}{A data.frame as produced by -\code{\link[=pairwise_comparison]{pairwise_comparison()}}} +\code{\link[=get_pairwise_comparisons]{get_pairwise_comparisons()}}} \item{type}{character vector of length one that is either "mean_scores_ratio" or "pval". This denotes whether to @@ -29,7 +29,7 @@ between models \examples{ library(ggplot2) scores <- score(as_forecast(example_quantile)) -pairwise <- pairwise_comparison(scores, by = "target_type") -plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") + +pairwise <- get_pairwise_comparisons(scores, by = "target_type") +plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") + facet_wrap(~target_type) } diff --git a/man/plot_score_table.Rd b/man/plot_score_table.Rd index 8c9506a0d..2acdf1702 100644 --- a/man/plot_score_table.Rd +++ b/man/plot_score_table.Rd @@ -7,9 +7,6 @@ plot_score_table(scores, y = "model", by = NULL, metrics = NULL) } \arguments{ -\item{scores}{An object of class \code{scores} (a data.table with -scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} - \item{y}{the variable to be shown on the y-axis. Instead of a single character string, you can also specify a vector with column names, e.g. \code{y = c("model", "location")}. These column names will be concatenated diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 6a69369bc..eb91ccc49 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -1,4 +1,4 @@ -test_that("pairwise_comparison() works", { +test_that("get_pairwise_comparisons() works", { # define some toy data using a format suitable for github.com/reichlab/covidHubUtils test_truth <- data.frame( model = rep("truth_source", 30), @@ -217,7 +217,7 @@ test_that("pairwise_comparison() works", { expect_equal(relative_skills_with$relative_skill, ratios_scaled) }) -test_that("pairwise_comparison() work in score() with integer data", { +test_that("get_pairwise_comparisons() work in score() with integer data", { eval <- suppressMessages(score(data = as_forecast(example_integer))) eval_summarised <- summarise_scores(eval, by = c("model", "target_type")) eval <- add_relative_skill(eval_summarised) @@ -225,7 +225,7 @@ test_that("pairwise_comparison() work in score() with integer data", { }) -test_that("pairwise_comparison() work in score() with binary data", { +test_that("get_pairwise_comparisons() work in score() with binary data", { eval <- suppressMessages(score(data = as_forecast(example_binary))) eval_summarised <- summarise_scores(eval, by = c("model", "target_type")) eval <- add_relative_skill(eval_summarised) @@ -235,7 +235,7 @@ test_that("pairwise_comparison() work in score() with binary data", { # tests for pairwise comparison function --------------------------------------- -test_that("pairwise_comparison() works", { +test_that("get_pairwise_comparisons() works", { df <- data.frame( model = rep(c("model1", "model2", "model3"), each = 10), date = as.Date("2020-01-01") + rep(1:5, each = 2), @@ -245,7 +245,7 @@ test_that("pairwise_comparison() works", { ) attr(df, "metrics") <- c("wis", "ae_median") - res <- suppressMessages(pairwise_comparison(df, baseline = "model1")) + res <- suppressMessages(get_pairwise_comparisons(df, baseline = "model1")) colnames <- c( "model", "compare_against", "mean_scores_ratio", @@ -256,10 +256,10 @@ test_that("pairwise_comparison() works", { }) -test_that("pairwise_comparison() and `add_relative_skill()` give same result", { +test_that("get_pairwise_comparisons() and `add_relative_skill()` give same result", { eval <- scores_continuous - pairwise <- pairwise_comparison(eval, + pairwise <- get_pairwise_comparisons(eval, by = "model", metric = "crps" ) @@ -272,9 +272,9 @@ test_that("pairwise_comparison() and `add_relative_skill()` give same result", { ) }) -test_that("pairwise_comparison() realises when there is no baseline model", { +test_that("get_pairwise_comparisons() realises when there is no baseline model", { expect_error( - pairwise_comparison(scores_quantile, baseline = "missing_model"), + get_pairwise_comparisons(scores_quantile, baseline = "missing_model"), "Assertion on 'baseline' failed: Must be a subset of" ) }) @@ -394,13 +394,13 @@ test_that("Basic input checks for `add_relative_skill() work", { ) }) -test_that("pairwise_comparison() throws errors with wrong inputs", { +test_that("get_pairwise_comparisons() throws errors with wrong inputs", { test <- data.table::copy(scores_continuous) test <- test[, "model" := NULL] # expect error if no model column is found expect_error( - pairwise_comparison(test, by = "location", metric = "crps"), + get_pairwise_comparisons(test, by = "location", metric = "crps"), "Assertion on 'scores' failed: Column 'model' not found in data." ) }) @@ -457,7 +457,7 @@ test_that("add_relative_skill() works with point forecasts", { ) pw_point <- summarise_scores(pw_point, by = "model") - pw_manual <- pairwise_comparison( + pw_manual <- get_pairwise_comparisons( scores_point, by = "model", metric = "se_point" ) diff --git a/tests/testthat/test-plot_pairwise_comparison.R b/tests/testthat/test-plot_pairwise_comparison.R index ffbf15374..88bac4fa5 100644 --- a/tests/testthat/test-plot_pairwise_comparison.R +++ b/tests/testthat/test-plot_pairwise_comparison.R @@ -1,17 +1,17 @@ pairwise <- suppressMessages( - pairwise_comparison(scores_quantile, by = "target_type") + get_pairwise_comparisons(scores_quantile, by = "target_type") ) -test_that("plot_pairwise_comparison() works as expected", { - p <- plot_pairwise_comparison(pairwise) + +test_that("plot_pairwise_comparisons() works as expected", { + p <- plot_pairwise_comparisons(pairwise) + ggplot2::facet_wrap(~target_type) expect_s3_class(p, "ggplot") skip_on_cran() vdiffr::expect_doppelganger("plot_pairwise_comparison", p) }) -test_that("plot_pairwise_comparison() works when showing p values", { - p <- plot_pairwise_comparison(pairwise, type = "pval") + +test_that("plot_pairwise_comparisons() works when showing p values", { + p <- plot_pairwise_comparisons(pairwise, type = "pval") + ggplot2::facet_wrap(~target_type) expect_s3_class(p, "ggplot") skip_on_cran() diff --git a/vignettes/scoringutils.Rmd b/vignettes/scoringutils.Rmd index 03388c4a4..1b8585426 100644 --- a/vignettes/scoringutils.Rmd +++ b/vignettes/scoringutils.Rmd @@ -283,12 +283,12 @@ example_quantile %>% Relative scores for different models can be computed using pairwise comparisons, a sort of pairwise tournament where all combinations of two models are compared against each other based on the overlapping set of available forecasts common to both models. Internally, a ratio of the mean scores of both models is computed. The relative score of a model is then the geometric mean of all mean score ratios which involve that model. When a baseline is provided, then that baseline is excluded from the relative scores for individual models (which therefore differ slightly from relative scores without a baseline) and all relative scores are scaled by (i.e. divided by) the relative score of the baseline model. -In `scoringutils`, pairwise comparisons can be made in two ways: Through the standalone function `pairwise_comparison()` or from within `summarise_scores()` which simply adds relative scores to an existing set of scores. +In `scoringutils`, pairwise comparisons can be made in two ways: Through the standalone function `get_pairwise_comparisons()` or from within `summarise_scores()` which simply adds relative scores to an existing set of scores. ```{r} forecast_quantile %>% score() %>% - pairwise_comparison(by = "model", baseline = "EuroCOVIDhub-baseline") + get_pairwise_comparisons(by = "model", baseline = "EuroCOVIDhub-baseline") ``` ```{r} @@ -298,13 +298,13 @@ forecast_quantile %>% add_relative_skill(baseline = "EuroCOVIDhub-baseline") ``` -If using the `pairwise_comparison()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty. +If using the `get_pairwise_comparisons()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty. ```{r, fig.width=9, fig.height=7} forecast_quantile %>% score() %>% - pairwise_comparison(by = c("model", "target_type")) %>% - plot_pairwise_comparison() + + get_pairwise_comparisons(by = c("model", "target_type")) %>% + plot_pairwise_comparisons() + facet_wrap(~ target_type) ``` From 87348bcd3314c1e094e5cd1d471bc32c611a10b3 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 13:53:46 +0100 Subject: [PATCH 3/6] fix linting issues --- R/pairwise-comparisons.R | 10 +++++----- R/plot.R | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index 43264f786..e4ec2519d 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -67,11 +67,11 @@ #' facet_wrap(~target_type) get_pairwise_comparisons <- function( - scores, - by = "model", - metric = intersect(c("wis", "crps", "brier_score"), names(scores)), - baseline = NULL, - ... + scores, + by = "model", + metric = intersect(c("wis", "crps", "brier_score"), names(scores)), + baseline = NULL, + ... ) { # input checks --------------------------------------------------------------- diff --git a/R/plot.R b/R/plot.R index 29036c48e..3482b7447 100644 --- a/R/plot.R +++ b/R/plot.R @@ -422,7 +422,7 @@ plot_quantile_coverage <- function(coverage, #' facet_wrap(~target_type) plot_pairwise_comparisons <- function(comparison_result, - type = c("mean_scores_ratio", "pval")) { + type = c("mean_scores_ratio", "pval")) { comparison_result <- data.table::as.data.table(comparison_result) relative_skill_metric <- grep( From ee94a77616ea4343d287bc5005399a7f1964b3ab Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 14:04:54 +0100 Subject: [PATCH 4/6] fix inherit statements --- R/correlations.R | 2 +- R/pairwise-comparisons.R | 4 ++-- R/plot.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/correlations.R b/R/correlations.R index ec249688d..e1f8d1c8e 100644 --- a/R/correlations.R +++ b/R/correlations.R @@ -9,7 +9,7 @@ #' be shown #' @param digits A number indicating how many decimal places the result should #' be rounded to. By default (`digits = NULL`) no rounding takes place. -#' @inheritParams pairwise_comparison +#' @inheritParams get_pairwise_comparisons #' @return An object of class `scores` (a data.table with an additional #' attribute `metrics` holding the names of the scores) with correlations #' between different metrics diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index e4ec2519d..3e44cd416 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -346,7 +346,7 @@ pairwise_comparison_one_group <- function(scores, #' pairwise comparisons for a set of forecasts with multiple subsets, e.g. #' pairwise comparisons for one set of forecasts, but done separately for two #' different forecast targets. -#' @inheritParams pairwise_comparison +#' @inheritParams get_pairwise_comparisons #' @param name_model1 character, name of the first model #' @param name_model2 character, name of the model to compare against #' @param one_sided Boolean, default is `FALSE`, whether two conduct a one-sided @@ -512,7 +512,7 @@ permutation_test <- function(scores1, #' the computation of relative skill, see [get_pairwise_comparisons()]. #' Relative skill will be calculated for the aggregation level specified in #' `by`. -#' @inheritParams pairwise_comparison +#' @inheritParams get_pairwise_comparisons #' @export #' @keywords keyword scoring add_relative_skill <- function( diff --git a/R/plot.R b/R/plot.R index 3482b7447..6633afb69 100644 --- a/R/plot.R +++ b/R/plot.R @@ -15,7 +15,7 @@ #' `NULL` (default), all metrics present in `scores` will be shown. #' #' @return A ggplot object with a coloured table of summarised scores -#' @inheritParams pairwise_comparison +#' @inheritParams get_pairwise_comparisons #' @importFrom ggplot2 ggplot aes element_blank element_text labs coord_cartesian coord_flip #' @importFrom data.table setDT melt #' @importFrom stats sd From 6712f0d927e4601aa799e117bc928d237c9a8d06 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 14:05:44 +0100 Subject: [PATCH 5/6] update docs --- man/add_relative_skill.Rd | 22 ++++++++++++++++++++++ man/compare_two_models.Rd | 7 +++++++ man/correlation.Rd | 3 +++ man/plot_score_table.Rd | 3 +++ 4 files changed, 35 insertions(+) diff --git a/man/add_relative_skill.Rd b/man/add_relative_skill.Rd index 83716456b..c7c38dbba 100644 --- a/man/add_relative_skill.Rd +++ b/man/add_relative_skill.Rd @@ -11,6 +11,28 @@ add_relative_skill( baseline = NULL ) } +\arguments{ +\item{scores}{An object of class \code{scores} (a data.table with +scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} + +\item{by}{character vector with column names that define the grouping level +for the pairwise comparisons. By default (\code{model}), there will be one +relative skill score per model. If, for example, +\code{by = c("model", "location")}. Then you will get a +separate relative skill score for every model in every location. Internally, +the data.table with scores will be split according \code{by} (removing "model" +before splitting) and the pairwise comparisons will be computed separately +for the split data.tables.} + +\item{metric}{A string with the name of the metric for which +a relative skill shall be computed. By default this is either "crps", +"wis" or "brier_score" if any of these are available.} + +\item{baseline}{A string with the name of a model. If a baseline is +given, then a scaled relative skill with respect to the baseline will be +returned. By default (\code{NULL}), relative skill will not be scaled with +respect to a baseline model.} +} \description{ Adds a columns with relative skills computed by running pairwise comparisons on the scores. diff --git a/man/compare_two_models.Rd b/man/compare_two_models.Rd index 6a595b052..24ceb5441 100644 --- a/man/compare_two_models.Rd +++ b/man/compare_two_models.Rd @@ -15,10 +15,17 @@ compare_two_models( ) } \arguments{ +\item{scores}{An object of class \code{scores} (a data.table with +scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} + \item{name_model1}{character, name of the first model} \item{name_model2}{character, name of the model to compare against} +\item{metric}{A string with the name of the metric for which +a relative skill shall be computed. By default this is either "crps", +"wis" or "brier_score" if any of these are available.} + \item{one_sided}{Boolean, default is \code{FALSE}, whether two conduct a one-sided instead of a two-sided test to determine significance in a pairwise comparison.} diff --git a/man/correlation.Rd b/man/correlation.Rd index b90f41d51..e4fc55985 100644 --- a/man/correlation.Rd +++ b/man/correlation.Rd @@ -7,6 +7,9 @@ correlation(scores, metrics = NULL, digits = NULL) } \arguments{ +\item{scores}{An object of class \code{scores} (a data.table with +scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} + \item{metrics}{A character vector with the metrics to show. If set to \code{NULL} (default), all metrics present in \code{scores} will be shown} diff --git a/man/plot_score_table.Rd b/man/plot_score_table.Rd index 2acdf1702..8c9506a0d 100644 --- a/man/plot_score_table.Rd +++ b/man/plot_score_table.Rd @@ -7,6 +7,9 @@ plot_score_table(scores, y = "model", by = NULL, metrics = NULL) } \arguments{ +\item{scores}{An object of class \code{scores} (a data.table with +scores and an additional attribute \code{metrics} as produced by \code{\link[=score]{score()}})} + \item{y}{the variable to be shown on the y-axis. Instead of a single character string, you can also specify a vector with column names, e.g. \code{y = c("model", "location")}. These column names will be concatenated From 5bfc3fb0d06b94d9aff6845251cb366ea36bc01d Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Wed, 20 Mar 2024 14:20:35 +0100 Subject: [PATCH 6/6] Update News file --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index f033206e5..0776219c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ The update introduces breaking changes. If you want to keep using the older vers - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`. - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_relative_skill()`, was introduced that takes summarised scores as an input and adds columns with relative skill scores and scaled relative skill scores. +- The function `pairwise_comparison()` was renamed to `get_pairwise_comparisons()`, in line with other `get_`-functions. Analogously, `plot_pairwise_comparison()` was renamed to `plot_pairwise_comparisons()`. - `add_coverage()` was replaced by a new function, `get_coverage()`. This function comes with an updated workflow where coverage values are computed directly based on the original data and can then be visualised using `plot_interval_coverage()` or `plot_quantile_coverage()`. An example worfklow would be `example_quantile |> as_forecast() |> get_coverage(by = "model") |> plot_interval_coverage()`. - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs) - The function `bias_range()` was removed (users should now use `bias_quantile()` instead)