From af25a3413859e6eca35f951e8fbe34db794bf29e Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Sun, 25 Feb 2024 20:56:39 -0500
Subject: [PATCH] handle competing events

estimate cumulative incidence for the event of interest in cuminc and cumincdiff; censor remaining events in other approaches
---
 R/estimate_event_time.R           | 60 ++++++++++++++++++++-----
 R/estimate_survdiff.R             |  7 ++-
 R/fill_cells.R                    |  2 +
 R/prepare_data.R                  | 73 ++++++++++++++++++++++++++++++-
 R/survdiff_ci.R                   | 34 ++++++++++----
 man/survdiff_ci.Rd                |  6 ++-
 vignettes/estimators_survival.Rmd | 29 ++++++++----
 7 files changed, 177 insertions(+), 34 deletions(-)

diff --git a/R/estimate_event_time.R b/R/estimate_event_time.R
index 84827ca..45fa2c0 100644
--- a/R/estimate_event_time.R
+++ b/R/estimate_event_time.R
@@ -39,6 +39,7 @@ estimate_event_time <- function(
     to,
     arguments,
     is_trend,
+    event_type = NULL,
     ...) {
   if(is_trend)
     return(tibble::tibble())
@@ -169,6 +170,10 @@ estimate_event_time <- function(
     },
     "medsurv" =,
     "medsurv (ci)" = {
+      if(!is.null(event_type)) {
+        warning(paste(
+          "type = 'medsurv': Note the presence of competing events."))
+      }
       data %>%
         dplyr::summarize(
           res = {
@@ -254,6 +259,15 @@ estimate_event_time <- function(
     "surv (ci)" =,
     "cuminc" =,
     "cuminc (ci)" = {
+      if(!is.null(event_type) &
+         stringr::str_detect(
+           string = type,
+           pattern = "surv")
+      ) {
+        stop(paste(
+          "Survival (type = 'surv') is not estimated with competing risks.",
+          "Use type = 'cuminc'."))
+      }
       data %>%
         dplyr::summarize(
           res = {
@@ -261,14 +275,14 @@ estimate_event_time <- function(
               fit <- survival::survfit(
                 formula = survival::Surv(
                   time  = .data$.time,
-                  event = .data$.event) ~ 1,
+                  event = .data$.event_compete) ~ 1,
                 conf.int = ci)
             } else {
               fit <- survival::survfit(
                 formula = survival::Surv(
                   time  = .data$.time_orig,
                   time2 = .data$.time2,
-                  event = .data$.event) ~ 1,
+                  event = .data$.event_compete) ~ 1,
                 conf.int = ci)
             }
             if(is.na(timepoint))
@@ -281,7 +295,9 @@ estimate_event_time <- function(
 
             if(stringr::str_detect(
               string = type,
-              pattern = "cuminc")) {
+              pattern = "cuminc") &
+              is.null(event_type)
+            ) {
               added <- 1
               multiply <- -1
               first_limit <- "upper"
@@ -293,13 +309,37 @@ estimate_event_time <- function(
               second_limit <- "upper"
             }
 
+            if(is.null(event_type)) {
+              est <- fit %>%
+                purrr::pluck("surv") %>%
+                dplyr::last()
+              ci_first <- fit %>%
+                purrr::pluck(first_limit) %>%
+                dplyr::last()
+              ci_second <- fit %>%
+                purrr::pluck(second_limit) %>%
+                dplyr::last()
+            } else {
+              est <- utils::tail(
+                fit$pstate[, which(fit$states == event_type)],
+                n = 1)
+              ci_first <- fit %>%
+                purrr::pluck(first_limit)
+              ci_first <- utils::tail(
+                ci_first[, which(fit$states == event_type)],
+                n = 1)
+              ci_second <- fit %>%
+                purrr::pluck(second_limit)
+              ci_second <- utils::tail(
+                ci_second[, which(fit$states == event_type)],
+                n = 1)
+            }
+
             paste0(
               format_round(
                 (added +
                    multiply *
-                   (fit %>%
-                      purrr::pluck("surv") %>%
-                      dplyr::last())) *
+                   est) *
                   percent_100,
                 digits = digits),
               percent_symbol,
@@ -311,18 +351,14 @@ estimate_event_time <- function(
                   format_round(
                     (added +
                        multiply *
-                       (fit %>%
-                          purrr::pluck(first_limit) %>%
-                          dplyr::last())) *
+                       ci_first) *
                       percent_100,
                     digits = digits),
                   to,
                   format_round(
                     (added +
                        multiply *
-                       (fit %>%
-                          purrr::pluck(second_limit) %>%
-                          dplyr::last())) *
+                       ci_second) *
                       percent_100,
                     digits = digits),
                   ")"),
diff --git a/R/estimate_survdiff.R b/R/estimate_survdiff.R
index 8784883..ca906c2 100644
--- a/R/estimate_survdiff.R
+++ b/R/estimate_survdiff.R
@@ -19,6 +19,7 @@
 #' @param event Name of event variable
 #' @param time Name of time variable
 #' @param time2 Name of second time variable, if any
+#' @param event_type Level of event variable with competing risks, if any
 #' @param ...
 #'
 #' @return Tibble
@@ -43,6 +44,7 @@ estimate_survdiff <- function(
     to,
     reference,
     arguments,
+    event_type,
     ...) {
   if(is_trend)
     return(tibble::tibble())
@@ -79,7 +81,7 @@ estimate_survdiff <- function(
           is.na(time2),
           true = "survival::Surv(time = .time, ",
           false = "survival::Surv(time = .time_orig, time2 = .time2, "),
-        "event = .event) ~ .exposure")),
+        "event = .event_compete) ~ .exposure")),
     data = data,
     time = timepoint,
     estimand = dplyr::if_else(
@@ -88,7 +90,8 @@ estimate_survdiff <- function(
         pattern = "survdiff"),
       true = "survival",
       false = "cuminc"),
-    conf.level = ci) %>%
+    conf.level = ci,
+    event_type = event_type) %>%
     dplyr::mutate(
       term = paste0(".exposure", .data$term)) %>%
     format_regression_results(
diff --git a/R/fill_cells.R b/R/fill_cells.R
index e9ad14e..312cf47 100644
--- a/R/fill_cells.R
+++ b/R/fill_cells.R
@@ -263,6 +263,7 @@ fill_cells <- function(
       reference = reference,
       factor = factor,
       arguments = arguments,
+      event_type = data_prep$event_type,
       is_trend = FALSE))
 
   if(setequal(
@@ -320,6 +321,7 @@ fill_cells <- function(
         reference = reference,
         factor = factor,
         arguments = arguments,
+        event_type = data_prep$event_type,
         is_trend = TRUE))
     if(setequal(
       res_cat,
diff --git a/R/prepare_data.R b/R/prepare_data.R
index def0604..0b09c4c 100644
--- a/R/prepare_data.R
+++ b/R/prepare_data.R
@@ -118,8 +118,22 @@ prepare_data <- function(
       data$.outcome <- data[[outcome]]
     }
   }
+  event_type <- NULL
   if(!is.na(time) & !is.na(event)) {
     if(time != "" & event != "") {
+      if(stringr::str_detect(
+        string = event,
+        pattern = "@")
+      ) {
+        event_type <- stringr::str_split_i(
+          string = event,
+          pattern = "@",
+          i = 2)
+        event <- stringr::str_split_i(
+          string = event,
+          pattern = "@",
+          i = 1)
+      }
       if(event != outcome | is.na(outcome)) {
         if(!(event %in% names(data)))
           stop(
@@ -135,6 +149,62 @@ prepare_data <- function(
         data <- data %>%
           dplyr::mutate(.event = .data$.outcome)
       }
+      if(!is.null(event_type)) {
+        if(!any(event_type %in% unique(data$.event))) {
+          stop(paste0(
+            "For event variable '",
+            event,
+            "', the specified event type '",
+            event_type,
+            "' is not available in the data. Available are: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+        }
+        if(!length(stats::na.omit(unique(data$.event))) > 2)
+          stop(paste0(
+            "For event variable '",
+            event,
+            "', the event type '",
+            event_type,
+            "' has been specified. However, the event variable does not ",
+            "appear to have more than two levels to encode competing events ",
+            "and censoring. Available levels are only: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+      } else {
+        if(length(stats::na.omit(unique(data$.event))) > 2)
+          stop(paste0(
+            "The event variable '",
+            event,
+            "' has more than two non-missing levels, suggesting that ",
+            "competing events may be encoded, but no specific event type ",
+            "(variable level) has been requested via ",
+            "event = 'event_variable@level'. Available levels are: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+      }
+      # Recode event variable for estimators that only handle one event type
+      data$.event_compete <- data$.event
+      if(!is.null(event_type)) {
+        data <- data %>%
+          dplyr::mutate(
+            .event = dplyr::if_else(
+              condition = .data$.event == event_type,
+              true = 1,
+              false = 0
+            )
+          )
+      }
+
       if(!(time %in% names(data)))
         stop(
           paste0(
@@ -207,5 +277,6 @@ prepare_data <- function(
   list(
     data = data,
     pattern = pattern,
-    xlevels = xlevels)
+    xlevels = xlevels,
+    event_type = event_type)
 }
diff --git a/R/survdiff_ci.R b/R/survdiff_ci.R
index 0887fa5..20c0e84 100644
--- a/R/survdiff_ci.R
+++ b/R/survdiff_ci.R
@@ -14,6 +14,8 @@
 #'   or cumulative incidence (\code{"cuminc"})? This parameter affects the
 #'   sign of the differences. Defaults to \code{"survival"}.
 #' @param conf.level Optional. Confidence level. Defaults to \code{0.95}.
+#' @param event_type Optional. Event type (level) for event variable with
+#'   competing events. Defaults to \code{NULL}.
 #'
 #' @references
 #' Com-Nougue C, Rodary C, Patte C. How to establish equivalence when data are
@@ -65,7 +67,8 @@ survdiff_ci <- function(
     data,
     time,
     estimand = c("survival", "cuminc"),
-    conf.level = 0.95
+    conf.level = 0.95,
+    event_type = NULL
 ) {
   zval <- stats::qnorm(1 - (1 - conf.level) / 2)
   estimand <- match.arg(estimand)
@@ -73,14 +76,27 @@ survdiff_ci <- function(
     survival::survfit(
       formula = formula,
       data = data),
-    time = time)
-  res <- tibble::tibble(
-    term = res$strata,
-    surv = res$surv,
-    se = res$std.err
-  )
-  if(estimand == "cuminc")
-    res$surv <- 1 - res$surv
+    time = time,
+    extend = TRUE)
+  if(is.null(event_type)) {
+    res <- tibble::tibble(
+      term = res$strata,
+      surv = res$surv,
+      se = res$std.err
+    )
+    if(estimand == "cuminc")
+      res$surv <- 1 - res$surv
+  } else {
+    if(estimand == "survival")
+      stop(paste(
+        "type = 'survdiff' may not be meaningful with competing events.",
+        "Use: type = 'cumincdiff'."))
+    res <- tibble::tibble(
+      term = res$strata,
+      surv = res$pstate[, which(res$states == event_type)],
+      se = res$std.err[, which(res$states == event_type)]
+    )
+  }
   res %>%
     dplyr::transmute(
       term = stringr::str_remove_all(
diff --git a/man/survdiff_ci.Rd b/man/survdiff_ci.Rd
index 5467ca8..8bd2015 100644
--- a/man/survdiff_ci.Rd
+++ b/man/survdiff_ci.Rd
@@ -9,7 +9,8 @@ survdiff_ci(
   data,
   time,
   estimand = c("survival", "cuminc"),
-  conf.level = 0.95
+  conf.level = 0.95,
+  event_type = NULL
 )
 }
 \arguments{
@@ -27,6 +28,9 @@ or cumulative incidence (\code{"cuminc"})? This parameter affects the
 sign of the differences. Defaults to \code{"survival"}.}
 
 \item{conf.level}{Optional. Confidence level. Defaults to \code{0.95}.}
+
+\item{event_type}{Optional. Event type (level) for event variable with
+competing events. Defaults to \code{NULL}.}
 }
 \value{
 Tibble in \code{\link[broom]{tidy}} format:
diff --git a/vignettes/estimators_survival.Rmd b/vignettes/estimators_survival.Rmd
index 5da9728..f8ffc5b 100644
--- a/vignettes/estimators_survival.Rmd
+++ b/vignettes/estimators_survival.Rmd
@@ -78,20 +78,20 @@ tibble::tribble(
 
 `type` | Description | Options (`arguments = `)
 -----+-----------------+---------
-`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
-`"cuminc (ci)"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
+`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
+`"cuminc (ci)"` | Cumulative incidence ("risk"), as above, with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
 `"events"` | Event count.
 `"events/time"` | Events slash person-time.
 `"events/time (rate)"` | A combination: Events slash time followed by rate in parentheses.
 `"events/total"` | Events slash number of observations.
 `"rate"` | Event rate: event count divided by person-time, multiplied by the `rifttable()` parameter `factor`.
 `"rate (ci)"` | Event rate with confidence interval (default: 95%; Poisson-type large-sample interval).
-`"surv"` | Survival from the Kaplan-Meier estimator. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
-`"surv (ci)"` | Survival from the Kaplan-Meier estimator with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
+`"surv"` | Survival from the Kaplan-Meier estimator. Not estimated if competing risks are present. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
+`"surv (ci)"` | Survival from the Kaplan-Meier estimator, as above, with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
 `"time"` | Person-time.
 `"maxfu"` | Maximum follow-up time.
-`"medfu"` | Median follow-up (reverse Kaplan-Meier), equals median survival for censoring.
-`"medfu (iqr)"` | Median and interquartile range for follow-up.
+`"medfu"` | Median follow-up ("reverse Kaplan-Meier"), equals median survival for censoring. If competing risks are present, events other than the event of interest are also considered censoring to estimate the median follow-up for the event of interest.
+`"medfu (iqr)"` | Median and interquartile range for follow-up, as above.
 `"medsurv"` | Median survival.
 `"medsurv (ci)"` | Median survival with confidence interval (default: 95%).
 
@@ -100,6 +100,17 @@ tibble::tribble(
 
 `type` | Description | Options (`arguments = `)
 -----+-----------------+-------------
-`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
-`"hr"`    | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
-`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%).  Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
+`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
+`"hr"`    | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). If competing events are present, hazard ratios are cause-specific for the event of interest. | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
+`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%). Not estimated if competing risks are present. Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
+
+
+# Competing events
+
+With only one event type, the `event` variable only has two levels: censoring, typically encoded as `0`, and the event, typically encoded as `1`. 
+
+With competing events, the `event` variable will have additional levels. The `survival::Surv()` function used by rifttable assumes that the first-ordered level represents censoring and the others are different non-censoring events. For example, if the event variable is a factor, then `"Censoring"` needs to be the first of the factor's `levels()`.
+
+It is necessary to specify the event of interest in the `design` if competing events are present. For example, if the `event` variable is a factor variable `status`, with levels `"Censored"`, `"Outcome of interest"`, and `"Other-cause death"`, then specify `type = "status@Outcome of interest"` in the table design.
+
+See the tables above for how competing events are handled. When no details are noted, the event of interest is recoded as the sole event and other events are considered censoring.