Merge pull request #722 from epiforecasts/rename-pairwise-comparison

Rename pairwise comparisons
epiforecasts · Mar 22, 2024 · f7b5283 · f7b5283
2 parents 0982955 + 004b1f3
commit f7b5283
Show file tree

Hide file tree

Showing 17 changed files with 108 additions and 107 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,7 +23,7 @@ S3method(validate_forecast,forecast_binary)
 S3method(validate_forecast,forecast_point)
 S3method(validate_forecast,forecast_quantile)
 S3method(validate_forecast,forecast_sample)
-export(add_pairwise_comparison)
+export(add_relative_skill)
 export(ae_median_quantile)
 export(ae_median_sample)
 export(as_forecast)
@@ -40,6 +40,7 @@ export(get_forecast_counts)
 export(get_forecast_type)
 export(get_forecast_unit)
 export(get_metrics)
+export(get_pairwise_comparisons)
 export(get_pit)
 export(interval_coverage)
 export(interval_coverage_deviation)
@@ -55,13 +56,12 @@ export(metrics_quantile)
 export(metrics_sample)
 export(new_forecast)
 export(overprediction)
-export(pairwise_comparison)
 export(pit_sample)
 export(plot_correlations)
 export(plot_forecast_counts)
 export(plot_heatmap)
 export(plot_interval_coverage)
-export(plot_pairwise_comparison)
+export(plot_pairwise_comparisons)
 export(plot_pit)
 export(plot_quantile_coverage)
 export(plot_score_table)

diff --git a/NEWS.md b/NEWS.md
@@ -26,7 +26,8 @@ The update introduces breaking changes. If you want to keep using the older vers
 - `score()` now returns objects of class `scores` with a stored attribute `metrics` that holds the names of the scoring rules that were used. Users can call `get_metrics()` to access the names of those scoring rules. 
 - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`.
-- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds columns with relative skil scores and scaled relative skill scores.
+- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_relative_skill()`, was introduced that takes summarised scores as an input and adds columns with relative skill scores and scaled relative skill scores.
+- The function `pairwise_comparison()` was renamed to `get_pairwise_comparisons()`, in line with other `get_`-functions. Analogously, `plot_pairwise_comparison()` was renamed to `plot_pairwise_comparisons()`.
 - `add_coverage()` was replaced by a new function, `get_coverage()`. This function comes with an updated workflow where coverage values are computed directly based on the original data and can then be visualised using `plot_interval_coverage()` or `plot_quantile_coverage()`. An example worfklow would be `example_quantile |> as_forecast() |> get_coverage(by = "model") |> plot_interval_coverage()`.
 - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs)
     - The function `bias_range()` was removed (users should now use `bias_quantile()` instead)

diff --git a/R/correlations.R b/R/correlations.R
@@ -9,7 +9,7 @@
 #' be shown
 #' @param digits A number indicating how many decimal places the result should
 #' be rounded to. By default (`digits = NULL`) no rounding takes place.
-#' @inheritParams pairwise_comparison
+#' @inheritParams get_pairwise_comparisons
 #' @return An object of class `scores` (a data.table with an additional
 #' attribute `metrics` holding the names of the scores) with correlations
 #' between different metrics

diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -60,13 +60,13 @@
 #' }
 #'
 #' scores <- score(as_forecast(example_quantile))
-#' pairwise <- pairwise_comparison(scores, by = "target_type")
+#' pairwise <- get_pairwise_comparisons(scores, by = "target_type")
 #'
 #' library(ggplot2)
-#' plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") +
+#' plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") +
 #'   facet_wrap(~target_type)
 
-pairwise_comparison <- function(
+get_pairwise_comparisons <- function(
   scores,
   by = "model",
   metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
@@ -204,14 +204,14 @@ pairwise_comparison <- function(
 #' @description
 #'
 #' This function does the pairwise comparison for one set of forecasts, but
-#' multiple models involved. It gets called from [pairwise_comparison()].
-#' [pairwise_comparison()] splits the data into arbitrary subgroups specified
-#' by the user (e.g. if pairwise comparison should be done separately for
-#' different forecast targets) and then the actual pairwise comparison for that
-#' subgroup is managed from [pairwise_comparison_one_group()]. In order to
+#' multiple models involved. It gets called from [get_pairwise_comparisons()].
+#' [get_pairwise_comparisons()] splits the data into arbitrary subgroups
+#' specified by the user (e.g. if pairwise comparison should be done separately
+#' for different forecast targets) and then the actual pairwise comparison for
+#' that subgroup is managed from [pairwise_comparison_one_group()]. In order to
 #' actually do the comparison between two models over a subset of common
 #' forecasts it calls [compare_two_models()].
-#' @inherit pairwise_comparison params return
+#' @inherit get_pairwise_comparisons params return
 #' @importFrom cli cli_abort
 #' @keywords internal
 
@@ -342,11 +342,11 @@ pairwise_comparison_one_group <- function(scores,
 #' from [pairwise_comparison_one_group()], which handles the
 #' comparison of multiple models on a single set of forecasts (there are no
 #' subsets of forecasts to be distinguished). [pairwise_comparison_one_group()]
-#' in turn gets called from from [pairwise_comparison()] which can handle
+#' in turn gets called from from [get_pairwise_comparisons()] which can handle
 #' pairwise comparisons for a set of forecasts with multiple subsets, e.g.
 #' pairwise comparisons for one set of forecasts, but done separately for two
 #' different forecast targets.
-#' @inheritParams pairwise_comparison
+#' @inheritParams get_pairwise_comparisons
 #' @param name_model1 character, name of the first model
 #' @param name_model2 character, name of the model to compare against
 #' @param one_sided Boolean, default is `FALSE`, whether two conduct a one-sided
@@ -430,7 +430,7 @@ compare_two_models <- function(scores,
 #' @title Calculate Geometric Mean
 #'
 #' @details
-#' Used in [pairwise_comparison()].
+#' Used in [get_pairwise_comparisons()].
 #'
 #' @param x numeric vector of values for which to calculate the geometric mean
 #' @return the geometric mean of the values in `x`. `NA` values are ignored.
@@ -452,7 +452,7 @@ geometric_mean <- function(x) {
 #' the two. This observed difference or ratio is compared against the same
 #' test statistic based on permutations of the original data.
 #'
-#' Used in [pairwise_comparison()].
+#' Used in [get_pairwise_comparisons()].
 #'
 #' @param scores1 vector of scores to compare against another vector of scores
 #' @param scores2 A second vector of scores to compare against the first
@@ -509,22 +509,22 @@ permutation_test <- function(scores1,
 #' @description Adds a columns with relative skills computed by running
 #' pairwise comparisons on the scores.
 #' For more information on
-#' the computation of relative skill, see [pairwise_comparison()].
+#' the computation of relative skill, see [get_pairwise_comparisons()].
 #' Relative skill will be calculated for the aggregation level specified in
 #' `by`.
-#' @inheritParams pairwise_comparison
+#' @inheritParams get_pairwise_comparisons
 #' @export
 #' @keywords keyword scoring
-add_pairwise_comparison <- function(
+add_relative_skill <- function(
   scores,
   by = "model",
   metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
   baseline = NULL
 ) {
 
-  # input checks are done in `pairwise_comparison()`
+  # input checks are done in `get_pairwise_comparisons()`
   # do pairwise comparisons ----------------------------------------------------
-  pairwise <- pairwise_comparison(
+  pairwise <- get_pairwise_comparisons(
     scores = scores,
     metric = metric,
     baseline = baseline,

diff --git a/R/plot.R b/R/plot.R
@@ -15,7 +15,7 @@
 #' `NULL` (default), all metrics present in `scores` will be shown.
 #'
 #' @return A ggplot object with a coloured table of summarised scores
-#' @inheritParams pairwise_comparison
+#' @inheritParams get_pairwise_comparisons
 #' @importFrom ggplot2 ggplot aes element_blank element_text labs coord_cartesian coord_flip
 #' @importFrom data.table setDT melt
 #' @importFrom stats sd
@@ -400,7 +400,7 @@ plot_quantile_coverage <- function(coverage,
 #' between models
 #'
 #' @param comparison_result A data.frame as produced by
-#' [pairwise_comparison()]
+#' [get_pairwise_comparisons()]
 #' @param type character vector of length one that is either
 #'  "mean_scores_ratio" or "pval". This denotes whether to
 #' visualise the ratio or the p-value of the pairwise comparison.
@@ -417,12 +417,12 @@ plot_quantile_coverage <- function(coverage,
 #' @examples
 #' library(ggplot2)
 #' scores <- score(as_forecast(example_quantile))
-#' pairwise <- pairwise_comparison(scores, by = "target_type")
-#' plot_pairwise_comparison(pairwise, type = "mean_scores_ratio") +
+#' pairwise <- get_pairwise_comparisons(scores, by = "target_type")
+#' plot_pairwise_comparisons(pairwise, type = "mean_scores_ratio") +
 #'   facet_wrap(~target_type)
 
-plot_pairwise_comparison <- function(comparison_result,
-                                     type = c("mean_scores_ratio", "pval")) {
+plot_pairwise_comparisons <- function(comparison_result,
+                                      type = c("mean_scores_ratio", "pval")) {
   comparison_result <- data.table::as.data.table(comparison_result)
 
   relative_skill_metric <- grep(

diff --git a/inst/create-metric-tables.R b/inst/create-metric-tables.R
@@ -189,7 +189,7 @@ pit <- list(
 mean_score_ratio <- list(
   `Metric` = "Mean score ratio",
   `Name` = r"(mean_scores_ratio)",
-  `Functions` = r"(pairwise_comparison())",
+  `Functions` = r"(get_pairwise_comparisons())",
   `D` = r"($\sim$)",
   `C` = r"($\sim$)",
   `B` = r"($\sim$)",
@@ -201,7 +201,7 @@ mean_score_ratio <- list(
 relative_skill <- list(
   `Metric` = "Relative skill",
   `Name` = list("relative_skill"),
-  `Functions` = r"(score(), pairwise_comparison())",
+  `Functions` = r"(score(), get_pairwise_comparisons())",
   `D` = r"($\sim$)",
   `C` = r"($\sim$)",
   `B` = r"($\sim$)",
@@ -213,7 +213,7 @@ relative_skill <- list(
 scaled_relative_skill <- list(
   `Metric` = "Scaled relative skill",
   `Name` = "scaled_rel_skill",
-  `Functions` = r"(score(), pairwise_comparison())",
+  `Functions` = r"(score(), get_pairwise_comparisons())",
   `D` = r"($\sim$)",
   `C` = r"($\sim$)",
   `B` = r"($\sim$)",

diff --git a/inst/manuscript/R/00-standalone-Figure-replication.R b/inst/manuscript/R/00-standalone-Figure-replication.R
@@ -575,9 +575,9 @@ score(example_quantile) |>
 # Figure 9
 # =============================================================================#
 score(example_quantile) |>
-  pairwise_comparison(by = c("model", "target_type"),
-                      baseline = "EuroCOVIDhub-baseline") |>
-  plot_pairwise_comparison() +
+  get_pairwise_comparisons(by = c("model", "target_type"),
+                           baseline = "EuroCOVIDhub-baseline") |>
+  plot_pairwise_comparisons() +
   facet_wrap(~ target_type)
 
 

diff --git a/man/add_pairwise_comparison.Rd → man/add_relative_skill.Rd b/man/add_pairwise_comparison.Rd → man/add_relative_skill.Rd
diff --git a/man/compare_two_models.Rd b/man/compare_two_models.Rd
diff --git a/man/geometric_mean.Rd b/man/geometric_mean.Rd
diff --git a/man/pairwise_comparison.Rd → man/get_pairwise_comparisons.Rd b/man/pairwise_comparison.Rd → man/get_pairwise_comparisons.Rd
diff --git a/man/pairwise_comparison_one_group.Rd b/man/pairwise_comparison_one_group.Rd
diff --git a/man/permutation_test.Rd b/man/permutation_test.Rd
diff --git a/man/plot_pairwise_comparison.Rd → man/plot_pairwise_comparisons.Rd b/man/plot_pairwise_comparison.Rd → man/plot_pairwise_comparisons.Rd