Make set_forecast_unit() internal

epiforecasts · Sep 11, 2024 · 939278f · 939278f
1 parent c011628
commit 939278f
Show file tree

Hide file tree

Showing 11 changed files with 36 additions and 12 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -19,3 +19,4 @@
 ^CODE_OF_CONDUCT\.md$
 ^inst/manuscript/output$
 ^CRAN-SUBMISSION$
+^.vscode
diff --git a/NAMESPACE b/NAMESPACE
@@ -82,7 +82,6 @@ export(quantile_score)
 export(score)
 export(se_mean_sample)
 export(select_metrics)
-export(set_forecast_unit)
 export(summarise_scores)
 export(summarize_scores)
 export(theme_scoringutils)

diff --git a/NEWS.md b/NEWS.md
@@ -82,6 +82,7 @@ of our [original](https://doi.org/10.48550/arXiv.2205.07090) `scoringutils` pape
 - Removed the function `plot_score_table()`. You can find the code in the Deprecated-visualisations Vignette. 
 - Removed the function `merge_pred_and_obs()` that was used to merge two separate data frames with forecasts and observations. We moved its contents to a new "Deprecated functions"-vignette.
 - Removed `interval_coverage_sample()` as users are now expected to convert to a quantile format first before scoring.
+- Function `set_forecast_unit()` was deleted. Instead there is now a `forecast_unit` argument in `as_forecast_<type>()` as well as in `get_duplicate_forecasts()`.
 
 ### Function changes
 - `bias_quantile()` changed the way it handles forecasts where the median is missing: The median is now imputed by linear interpolation between the innermost quantiles. Previously, we imputed the median by simply taking the mean of the innermost quantiles.

diff --git a/R/convenience-functions.R b/R/convenience-functions.R
@@ -266,12 +266,11 @@ log_shift <- function(x, offset = 0, base = exp(1)) {
 #'   scoring or denote the unit of a single forecast as specified by the user.
 #' @importFrom data.table ':=' is.data.table copy
 #' @importFrom checkmate assert_character assert_subset
-#' @export
 #' @keywords as_forecast
 #' @examples
 #' library(magrittr) # pipe operator
 #' example_quantile %>%
-#'   set_forecast_unit(
+#'   scoringutils:::set_forecast_unit(
 #'     c("location", "target_end_date", "target_type", "horizon", "model")
 #'   )
 set_forecast_unit <- function(data, forecast_unit) {

diff --git a/R/forecast.R b/R/forecast.R
@@ -16,7 +16,7 @@
 #' existing columns of their input data to match the required columns for a
 #' forecast object. Using the argument `forecast_unit`, users can specify the
 #' the columns that uniquely identify a single forecast (and remove the others,
-#' see [set_forecast_unit()] for details).
+#' see docs for the internal [set_forecast_unit()] for details).
 #'
 #' The following functions are available:
 #' - [as_forecast_point()]

diff --git a/R/get_-functions.R b/R/get_-functions.R
@@ -279,11 +279,11 @@ get_protected_columns <- function(data = NULL) {
 #' @title Find duplicate forecasts
 #'
 #' @description
-#' Helper function to identify duplicate forecasts, i.e.
+#' Internal helper function to identify duplicate forecasts, i.e.
 #' instances where there is more than one forecast for the same prediction
 #' target.
 #'
-#' @param data A data.frame as used for [score()]
+#' @inheritParams as_forecast
 #' @param counts Should the output show the number of duplicates per forecast
 #'   unit instead of the individual duplicated rows? Default is `FALSE`.
 #' @return A data.frame with all rows for which a duplicate forecast was found
@@ -297,10 +297,15 @@ get_protected_columns <- function(data = NULL) {
 
 get_duplicate_forecasts <- function(
   data,
+  forecast_unit = NULL,
   counts = FALSE
 ) {
   assert_data_frame(data)
   data <- ensure_data.table(data)
+
+  if (!is.null(forecast_unit)) {
+    data <- set_forecast_unit(data, forecast_unit)
+  }
   forecast_unit <- get_forecast_unit(data)
   available_type <- c("sample_id", "quantile_level", "predicted_label") %in% colnames(data)
   type <- c("sample_id", "quantile_level", "predicted_label")[available_type]

diff --git a/man/as_forecast.Rd b/man/as_forecast.Rd
diff --git a/man/check_duplicates.Rd b/man/check_duplicates.Rd
diff --git a/man/get_duplicate_forecasts.Rd b/man/get_duplicate_forecasts.Rd
diff --git a/man/set_forecast_unit.Rd b/man/set_forecast_unit.Rd
diff --git a/tests/testthat/test-get_-functions.R b/tests/testthat/test-get_-functions.R
@@ -165,6 +165,14 @@ test_that("get_type() handles `NA` values", {
 # get_duplicate_forecasts()
 # ==============================================================================
 test_that("get_duplicate_forecasts() works as expected for quantile", {
+  expect_no_condition(get_duplicate_forecasts(
+    example_quantile,
+    forecast_unit =
+      c("location", "target_end_date", "target_type", "location_name",
+        "forecast_date", "model")
+    )
+  )
+
   expect_equal(nrow(get_duplicate_forecasts(example_quantile)), 0)
   expect_equal(
     nrow(