epiforecasts · nikosbosse · Dec 9, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,11 +12,13 @@ S3method(as_forecast_quantile,forecast_sample)
 S3method(assert_forecast,default)
 S3method(assert_forecast,forecast_binary)
 S3method(assert_forecast,forecast_nominal)
+S3method(assert_forecast,forecast_ordinal)
 S3method(assert_forecast,forecast_point)
 S3method(assert_forecast,forecast_quantile)
 S3method(assert_forecast,forecast_sample)
 S3method(get_metrics,forecast_binary)
 S3method(get_metrics,forecast_nominal)
+S3method(get_metrics,forecast_ordinal)
 S3method(get_metrics,forecast_point)
 S3method(get_metrics,forecast_quantile)
 S3method(get_metrics,forecast_sample)
@@ -29,6 +31,7 @@ S3method(print,forecast)
 S3method(score,default)
 S3method(score,forecast_binary)
 S3method(score,forecast_nominal)
+S3method(score,forecast_ordinal)
 S3method(score,forecast_point)
 S3method(score,forecast_quantile)
 S3method(score,forecast_sample)
@@ -38,6 +41,7 @@ export(ae_median_quantile)
 export(ae_median_sample)
 export(as_forecast_binary)
 export(as_forecast_nominal)
+export(as_forecast_ordinal)
 export(as_forecast_point)
 export(as_forecast_quantile)
 export(as_forecast_sample)
@@ -61,12 +65,14 @@ export(interval_coverage)
 export(is_forecast)
 export(is_forecast_binary)
 export(is_forecast_nominal)
+export(is_forecast_ordinal)
 export(is_forecast_point)
 export(is_forecast_quantile)
 export(is_forecast_sample)
 export(log_shift)
 export(logs_binary)
 export(logs_nominal)
+export(logs_ordinal)
 export(logs_sample)
 export(mad_sample)
 export(new_forecast)
@@ -81,6 +87,7 @@ export(plot_pairwise_comparisons)
 export(plot_quantile_coverage)
 export(plot_wis)
 export(quantile_score)
+export(rps_ordinal)
 export(score)
 export(se_mean_sample)
 export(select_metrics)
@@ -183,6 +190,7 @@ importFrom(purrr,partial)
 importFrom(scoringRules,crps_sample)
 importFrom(scoringRules,dss_sample)
 importFrom(scoringRules,logs_sample)
+importFrom(scoringRules,rps_probs)
 importFrom(stats,cor)
 importFrom(stats,mad)
 importFrom(stats,median)

diff --git a/NEWS.md b/NEWS.md
diff --git a/R/class-forecast-nominal.R b/R/class-forecast-nominal.R
@@ -72,6 +72,9 @@ assert_forecast.forecast_nominal <- function(
   )
   assert_forecast_type(forecast, actual = "nominal", desired = forecast_type)
 
+  assert_factor(forecast$observed, ordered = FALSE)
+  assert_factor(forecast$predicted_label, ordered = FALSE)
+
   # levels need to be the same
   outcomes <- levels(forecast$observed)
   assert_set_equal(levels(forecast$predicted_label), outcomes)

diff --git a/R/class-forecast-ordinal.R b/R/class-forecast-ordinal.R
@@ -0,0 +1,195 @@
+#' @title Create a `forecast` object for ordinal forecasts
+#' @inherit as_forecast_doc_template params description
+#' @details
+#' Ordinal forecasts are a form of categorical forecasts and represent a
+#' generalisation of binary forecasts to multiple outcomes. The possible
+#' outcomes that the observed values can assume are ordered.
+#'
+#' # Required input
+#'
+#' The input needs to be a data.frame or similar with the following columns:
+#' - `observed`: Column with observed values of type `factor` with N ordered
+#'   levels, where N is the number of possible outcomes.
+#'   The levels of the factor represent the possible outcomes that
+#'   the observed values can assume.
+#' - `predicted`: `numeric` column with predicted probabilities. The values
+#'    represent the probability that the observed value is equal to the factor
+#'    level denoted in `predicted_label`. Note that forecasts must be complete,
+#'    i.e. there must be a probability assigned to every possible outcome and
+#'    those probabilities must sum to one.
+#' - `predicted_label`: `factor` with N levels, denoting the outcome that the
+#'    probabilities in `predicted` correspond to.
+#'
+#' For convenience, we recommend an additional column `model` holding the name
+#' of the forecaster or model that produced a prediction, but this is not
+#' strictly necessary.
+#'
+#' See the [example_ordinal] data set for an example.
+#' @inheritSection forecast_types Forecast unit
+#' @param predicted_label (optional) Name of the column in `data` that denotes
+#'   the outcome to which a predicted probability corresponds to.
+#'   This column will be renamed to "predicted_label".
+#' @returns A `forecast` object of class `forecast_ordinal`
+#' @family functions to create forecast objects
+#' @keywords as_forecast
+#' @export
+#' @examples
+#' as_forecast_ordinal(
+#'   na.omit(example_ordinal),
+#'   predicted = "predicted",
+#'   forecast_unit = c("model", "target_type", "target_end_date",
+#'                     "horizon", "location")
+#' )
+as_forecast_ordinal <- function(data,
+                                forecast_unit = NULL,
+                                observed = NULL,
+                                predicted = NULL,
+                                predicted_label = NULL) {
+  assert_character(predicted_label, len = 1, null.ok = TRUE)
+  assert_subset(predicted_label, names(data), empty.ok = TRUE)
+  if (!is.null(predicted_label)) {
+    setnames(data, old = predicted_label, new = "predicted_label")
+  }
+
+  data <- as_forecast_generic(data, forecast_unit, observed, predicted)
+  data <- new_forecast(data, "forecast_ordinal")
+  assert_forecast(data)
+  return(data)
+}
+
+
+#' @export
+#' @keywords check-forecasts
+#' @importFrom checkmate assert_names assert_set_equal test_set_equal assert_factor
+assert_forecast.forecast_ordinal <- function(
+  forecast, forecast_type = NULL, verbose = TRUE, ...
+) {
+  forecast <- assert_forecast_generic(forecast, verbose)
+  assert(check_columns_present(forecast, "predicted_label"))
+  assert_names(
+    colnames(forecast),
+    disjunct.from = c("sample_id", "quantile_level")
+  )
+  assert_forecast_type(forecast, actual = "ordinal", desired = forecast_type)
+
+  assert_factor(forecast$observed, ordered = TRUE)
+  assert_factor(forecast$predicted_label, ordered = TRUE)
+
+  observed_levels <- levels(forecast$observed)
+  predicted_label_levels <- levels(forecast$predicted_label)
+  if (!identical(predicted_label_levels, observed_levels)) {
+    cli_abort(
+      "Levels of `predicted_label` and `observed` must be identical
+      and in the same order. Found levels {.val {predicted_label_levels}}
+      and {.val {observed_levels}}."
+    )
+  }
+
+  # forecasts need to be complete
+  forecast_unit <- get_forecast_unit(forecast)
+  complete <- as.data.table(forecast)[, .(
+    correct = test_set_equal(as.character(predicted_label), observed_levels)
+  ), by = forecast_unit]
+
+  if (!all(complete$correct)) {
+    first_issue <- complete[(correct), ..forecast_unit][1]
+    first_issue <- lapply(first_issue, FUN = as.character)
+    #nolint start: keyword_quote_linter object_usage_linter duplicate_argument_linter
+    issue_location <- paste(names(first_issue), "==", first_issue)
+    cli_abort(
+      c(`!` = "Found incomplete forecasts",
+        `i` = "For an ordinal forecast, all possible outcomes must be assigned
+        a probability explicitly.",
+        `i` = "Found first missing probabilities in the forecast identified by
+        {.emph {issue_location}}")
+    )
+    #nolint end
+  }
+  return(forecast[])
+}
+
+
+#' @export
+#' @rdname is_forecast
+is_forecast_ordinal <- function(x) {
+  inherits(x, "forecast_ordinal") && inherits(x, "forecast")
+}
+
+
+#' @importFrom stats na.omit
+#' @importFrom data.table setattr
+#' @rdname score
+#' @export
+score.forecast_ordinal <- function(forecast, metrics = get_metrics(forecast), ...) {
+  forecast <- clean_forecast(forecast, copy = TRUE, na.omit = TRUE)
+  forecast_unit <- get_forecast_unit(forecast)
+  metrics <- validate_metrics(metrics)
+  forecast <- as.data.table(forecast)
+
+  # transpose the forecasts that belong to the same forecast unit
+  # make sure the labels and predictions are ordered in the same way
+  f_transposed <- forecast[, .(
+    predicted = list(predicted[order(predicted_label)]),
+    observed = unique(observed)
+  ), by = forecast_unit]
+
+  observed <- f_transposed$observed
+  predicted <- do.call(rbind, f_transposed$predicted)
+  predicted_label <- sort(unique(forecast$predicted_label, na.last = TRUE))
+  f_transposed[, c("observed", "predicted") := NULL]
+
+  scores <- apply_metrics(
+    f_transposed, metrics,
+    observed, predicted, predicted_label, ...
+  )
+  scores <- as_scores(scores, metrics = names(metrics))
+  return(scores[])
+}
+
+
+#' Get default metrics for nominal forecasts
+#' @inheritParams get_metrics.forecast_binary
+#' @description
+#' For nominal forecasts, the default scoring rule is:
+#' - "log_score" = [logs_nominal()]
+#' - "rps" = [rps_ordinal()]
+#' @export
+#' @family get_metrics functions
+#' @keywords handle-metrics
+#' @examples
+#' get_metrics(example_ordinal)
+get_metrics.forecast_ordinal <- function(x, select = NULL, exclude = NULL, ...) {
+  all <- list(
+    log_score = logs_nominal,
+    rps = rps_ordinal
+  )
+  select_metrics(all, select, exclude)
+}
+
+
+#' Ordinal example data
+#'
+#' A data set with predictions for COVID-19 cases and deaths submitted to the
+#' European Forecast Hub.
+#'
+#' The data was created using the script create-example-data.R in the inst/
+#' folder (or the top level folder in a compiled package).
+#'
+#' @format An object of class `forecast_ordinal`
+#' (see [as_forecast_ordinal()]) with the following columns:
+#' \describe{
+#'   \item{location}{the country for which a prediction was made}
+#'   \item{target_end_date}{the date for which a prediction was made}
+#'   \item{target_type}{the target to be predicted (cases or deaths)}
+#'   \item{observed}{Numeric: observed values}
+#'   \item{location_name}{name of the country for which a prediction was made}
+#'   \item{forecast_date}{the date on which a prediction was made}
+#'   \item{predicted_label}{outcome that a probabilty corresponds to}
+#'   \item{predicted}{predicted value}
+#'   \item{model}{name of the model that generated the forecasts}
+#'   \item{horizon}{forecast horizon in weeks}
+#' }
+# nolint start
+#' @source \url{https://github.com/european-modelling-hubs/covid19-forecast-hub-europe_archive/commit/a42867b1ea152c57e25b04f9faa26cfd4bfd8fa6/}
+# nolint end
+"example_ordinal"
diff --git a/R/documentation-templates.R b/R/documentation-templates.R
@@ -148,3 +148,18 @@ NULL
 #' @name illustration-input-metric-nominal
 #' @keywords internal
 NULL
+
+#' Illustration of required inputs for ordinal forecasts
+#' @details # Input format
+#' \if{html}{
+#'   \out{<div style="text-align: left">}
+#'   \figure{metrics-ordinal.png}{options: style="width:750px;max-width:100\%;"}
+#'   \out{</div><p>}
+#'   Overview of required input format for ordinal forecasts
+#' }
+#' \if{latex}{
+#'   \figure{metrics-ordinal.png}
+#' }
+#' @name illustration-input-metric-ordinal
+#' @keywords internal
+NULL
diff --git a/R/metrics-nominal.R b/R/metrics-nominal.R
@@ -4,18 +4,14 @@
 #' @param observed Input to be checked. Should be a factor of length n with
 #'   N levels holding the observed values. n is the number of observations and
 #'   N is the number of possible outcomes the observed values can assume.
-#'   output)
-#' @param predicted Input to be checked. Should be nxN matrix of predictive
-#'   quantiles, n (number of rows) being the number of data points and N
+#' @param predicted Input to be checked. Should be nxN matrix of predicted
+#'   probabilities, n (number of rows) being the number of data points and N
 #'   (number of columns) the number of possible outcomes the observed values
 #'   can assume.
 #'   If `observed` is just a single number, then predicted can just be a
 #'   vector of size N.
-#' @param predicted Input to be checked. `predicted` should be a vector of
-#'   length n, holding probabilities. Alternatively, `predicted` can be a matrix
-#'   of size n x 1. Values represent the probability that
-#'   the corresponding value in `observed` will be equal to the highest
-#'   available factor level.
+#'   Values represent the probability that the corresponding value
+#'   in `observed` will be equal to the highest available factor level.
 #' @param predicted_label Factor of length N with N levels, where N is the
 #'   number of possible outcomes the observed values can assume.
 #' @importFrom checkmate assert_factor assert_numeric assert_set_equal