handle competing events

estimate cumulative incidence for the event of interest in cuminc and cumincdiff; censor remaining events in other approaches
stopsack · Feb 26, 2024 · af25a34 · af25a34
1 parent 7c6aff6
commit af25a34
Show file tree

Hide file tree

Showing 7 changed files with 177 additions and 34 deletions.
diff --git a/R/estimate_event_time.R b/R/estimate_event_time.R
@@ -39,6 +39,7 @@ estimate_event_time <- function(
     to,
     arguments,
     is_trend,
+    event_type = NULL,
     ...) {
   if(is_trend)
     return(tibble::tibble())
@@ -169,6 +170,10 @@ estimate_event_time <- function(
     },
     "medsurv" =,
     "medsurv (ci)" = {
+      if(!is.null(event_type)) {
+        warning(paste(
+          "type = 'medsurv': Note the presence of competing events."))
+      }
       data %>%
         dplyr::summarize(
           res = {
@@ -254,21 +259,30 @@ estimate_event_time <- function(
     "surv (ci)" =,
     "cuminc" =,
     "cuminc (ci)" = {
+      if(!is.null(event_type) &
+         stringr::str_detect(
+           string = type,
+           pattern = "surv")
+      ) {
+        stop(paste(
+          "Survival (type = 'surv') is not estimated with competing risks.",
+          "Use type = 'cuminc'."))
+      }
       data %>%
         dplyr::summarize(
           res = {
             if(is.na(time2[1])) {
               fit <- survival::survfit(
                 formula = survival::Surv(
                   time  = .data$.time,
-                  event = .data$.event) ~ 1,
+                  event = .data$.event_compete) ~ 1,
                 conf.int = ci)
             } else {
               fit <- survival::survfit(
                 formula = survival::Surv(
                   time  = .data$.time_orig,
                   time2 = .data$.time2,
-                  event = .data$.event) ~ 1,
+                  event = .data$.event_compete) ~ 1,
                 conf.int = ci)
             }
             if(is.na(timepoint))
@@ -281,7 +295,9 @@ estimate_event_time <- function(
 
             if(stringr::str_detect(
               string = type,
-              pattern = "cuminc")) {
+              pattern = "cuminc") &
+              is.null(event_type)
+            ) {
               added <- 1
               multiply <- -1
               first_limit <- "upper"
@@ -293,13 +309,37 @@ estimate_event_time <- function(
               second_limit <- "upper"
             }
 
+            if(is.null(event_type)) {
+              est <- fit %>%
+                purrr::pluck("surv") %>%
+                dplyr::last()
+              ci_first <- fit %>%
+                purrr::pluck(first_limit) %>%
+                dplyr::last()
+              ci_second <- fit %>%
+                purrr::pluck(second_limit) %>%
+                dplyr::last()
+            } else {
+              est <- utils::tail(
+                fit$pstate[, which(fit$states == event_type)],
+                n = 1)
+              ci_first <- fit %>%
+                purrr::pluck(first_limit)
+              ci_first <- utils::tail(
+                ci_first[, which(fit$states == event_type)],
+                n = 1)
+              ci_second <- fit %>%
+                purrr::pluck(second_limit)
+              ci_second <- utils::tail(
+                ci_second[, which(fit$states == event_type)],
+                n = 1)
+            }
+
             paste0(
               format_round(
                 (added +
                    multiply *
-                   (fit %>%
-                      purrr::pluck("surv") %>%
-                      dplyr::last())) *
+                   est) *
                   percent_100,
                 digits = digits),
               percent_symbol,
@@ -311,18 +351,14 @@ estimate_event_time <- function(
                   format_round(
                     (added +
                        multiply *
-                       (fit %>%
-                          purrr::pluck(first_limit) %>%
-                          dplyr::last())) *
+                       ci_first) *
                       percent_100,
                     digits = digits),
                   to,
                   format_round(
                     (added +
                        multiply *
-                       (fit %>%
-                          purrr::pluck(second_limit) %>%
-                          dplyr::last())) *
+                       ci_second) *
                       percent_100,
                     digits = digits),
                   ")"),

diff --git a/R/estimate_survdiff.R b/R/estimate_survdiff.R
@@ -19,6 +19,7 @@
 #' @param event Name of event variable
 #' @param time Name of time variable
 #' @param time2 Name of second time variable, if any
+#' @param event_type Level of event variable with competing risks, if any
 #' @param ...
 #'
 #' @return Tibble
@@ -43,6 +44,7 @@ estimate_survdiff <- function(
     to,
     reference,
     arguments,
+    event_type,
     ...) {
   if(is_trend)
     return(tibble::tibble())
@@ -79,7 +81,7 @@ estimate_survdiff <- function(
           is.na(time2),
           true = "survival::Surv(time = .time, ",
           false = "survival::Surv(time = .time_orig, time2 = .time2, "),
-        "event = .event) ~ .exposure")),
+        "event = .event_compete) ~ .exposure")),
     data = data,
     time = timepoint,
     estimand = dplyr::if_else(
@@ -88,7 +90,8 @@ estimate_survdiff <- function(
         pattern = "survdiff"),
       true = "survival",
       false = "cuminc"),
-    conf.level = ci) %>%
+    conf.level = ci,
+    event_type = event_type) %>%
     dplyr::mutate(
       term = paste0(".exposure", .data$term)) %>%
     format_regression_results(

diff --git a/R/fill_cells.R b/R/fill_cells.R
@@ -263,6 +263,7 @@ fill_cells <- function(
       reference = reference,
       factor = factor,
       arguments = arguments,
+      event_type = data_prep$event_type,
       is_trend = FALSE))
 
   if(setequal(
@@ -320,6 +321,7 @@ fill_cells <- function(
         reference = reference,
         factor = factor,
         arguments = arguments,
+        event_type = data_prep$event_type,
         is_trend = TRUE))
     if(setequal(
       res_cat,

diff --git a/R/prepare_data.R b/R/prepare_data.R
@@ -118,8 +118,22 @@ prepare_data <- function(
       data$.outcome <- data[[outcome]]
     }
   }
+  event_type <- NULL
   if(!is.na(time) & !is.na(event)) {
     if(time != "" & event != "") {
+      if(stringr::str_detect(
+        string = event,
+        pattern = "@")
+      ) {
+        event_type <- stringr::str_split_i(
+          string = event,
+          pattern = "@",
+          i = 2)
+        event <- stringr::str_split_i(
+          string = event,
+          pattern = "@",
+          i = 1)
+      }
       if(event != outcome | is.na(outcome)) {
         if(!(event %in% names(data)))
           stop(
@@ -135,6 +149,62 @@ prepare_data <- function(
         data <- data %>%
           dplyr::mutate(.event = .data$.outcome)
       }
+      if(!is.null(event_type)) {
+        if(!any(event_type %in% unique(data$.event))) {
+          stop(paste0(
+            "For event variable '",
+            event,
+            "', the specified event type '",
+            event_type,
+            "' is not available in the data. Available are: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+        }
+        if(!length(stats::na.omit(unique(data$.event))) > 2)
+          stop(paste0(
+            "For event variable '",
+            event,
+            "', the event type '",
+            event_type,
+            "' has been specified. However, the event variable does not ",
+            "appear to have more than two levels to encode competing events ",
+            "and censoring. Available levels are only: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+      } else {
+        if(length(stats::na.omit(unique(data$.event))) > 2)
+          stop(paste0(
+            "The event variable '",
+            event,
+            "' has more than two non-missing levels, suggesting that ",
+            "competing events may be encoded, but no specific event type ",
+            "(variable level) has been requested via ",
+            "event = 'event_variable@level'. Available levels are: ",
+            paste(
+              unique(data$.event),
+              collapse = " "
+            )
+          ))
+      }
+      # Recode event variable for estimators that only handle one event type
+      data$.event_compete <- data$.event
+      if(!is.null(event_type)) {
+        data <- data %>%
+          dplyr::mutate(
+            .event = dplyr::if_else(
+              condition = .data$.event == event_type,
+              true = 1,
+              false = 0
+            )
+          )
+      }
+
       if(!(time %in% names(data)))
         stop(
           paste0(
@@ -207,5 +277,6 @@ prepare_data <- function(
   list(
     data = data,
     pattern = pattern,
-    xlevels = xlevels)
+    xlevels = xlevels,
+    event_type = event_type)
 }
diff --git a/R/survdiff_ci.R b/R/survdiff_ci.R
@@ -14,6 +14,8 @@
 #'   or cumulative incidence (\code{"cuminc"})? This parameter affects the
 #'   sign of the differences. Defaults to \code{"survival"}.
 #' @param conf.level Optional. Confidence level. Defaults to \code{0.95}.
+#' @param event_type Optional. Event type (level) for event variable with
+#'   competing events. Defaults to \code{NULL}.
 #'
 #' @references
 #' Com-Nougue C, Rodary C, Patte C. How to establish equivalence when data are
@@ -65,22 +67,36 @@ survdiff_ci <- function(
     data,
     time,
     estimand = c("survival", "cuminc"),
-    conf.level = 0.95
+    conf.level = 0.95,
+    event_type = NULL
 ) {
   zval <- stats::qnorm(1 - (1 - conf.level) / 2)
   estimand <- match.arg(estimand)
   res <- summary(
     survival::survfit(
       formula = formula,
       data = data),
-    time = time)
-  res <- tibble::tibble(
-    term = res$strata,
-    surv = res$surv,
-    se = res$std.err
-  )
-  if(estimand == "cuminc")
-    res$surv <- 1 - res$surv
+    time = time,
+    extend = TRUE)
+  if(is.null(event_type)) {
+    res <- tibble::tibble(
+      term = res$strata,
+      surv = res$surv,
+      se = res$std.err
+    )
+    if(estimand == "cuminc")
+      res$surv <- 1 - res$surv
+  } else {
+    if(estimand == "survival")
+      stop(paste(
+        "type = 'survdiff' may not be meaningful with competing events.",
+        "Use: type = 'cumincdiff'."))
+    res <- tibble::tibble(
+      term = res$strata,
+      surv = res$pstate[, which(res$states == event_type)],
+      se = res$std.err[, which(res$states == event_type)]
+    )
+  }
   res %>%
     dplyr::transmute(
       term = stringr::str_remove_all(

diff --git a/man/survdiff_ci.Rd b/man/survdiff_ci.Rd
diff --git a/vignettes/estimators_survival.Rmd b/vignettes/estimators_survival.Rmd
@@ -78,20 +78,20 @@ tibble::tribble(
 
 `type` | Description | Options (`arguments = `)
 -----+-----------------+---------
-`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
-`"cuminc (ci)"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
+`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
+`"cuminc (ci)"` | Cumulative incidence ("risk"), as above, with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
 `"events"` | Event count.
 `"events/time"` | Events slash person-time.
 `"events/time (rate)"` | A combination: Events slash time followed by rate in parentheses.
 `"events/total"` | Events slash number of observations.
 `"rate"` | Event rate: event count divided by person-time, multiplied by the `rifttable()` parameter `factor`.
 `"rate (ci)"` | Event rate with confidence interval (default: 95%; Poisson-type large-sample interval).
-`"surv"` | Survival from the Kaplan-Meier estimator. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
-`"surv (ci)"` | Survival from the Kaplan-Meier estimator with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
+`"surv"` | Survival from the Kaplan-Meier estimator. Not estimated if competing risks are present. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
+`"surv (ci)"` | Survival from the Kaplan-Meier estimator, as above, with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
 `"time"` | Person-time.
 `"maxfu"` | Maximum follow-up time.
-`"medfu"` | Median follow-up (reverse Kaplan-Meier), equals median survival for censoring.
-`"medfu (iqr)"` | Median and interquartile range for follow-up.
+`"medfu"` | Median follow-up ("reverse Kaplan-Meier"), equals median survival for censoring. If competing risks are present, events other than the event of interest are also considered censoring to estimate the median follow-up for the event of interest.
+`"medfu (iqr)"` | Median and interquartile range for follow-up, as above.
 `"medsurv"` | Median survival.
 `"medsurv (ci)"` | Median survival with confidence interval (default: 95%).
 
@@ -100,6 +100,17 @@ tibble::tribble(
 
 `type` | Description | Options (`arguments = `)
 -----+-----------------+-------------
-`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
-`"hr"`    | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
-`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%).  Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
+`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
+`"hr"`    | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). If competing events are present, hazard ratios are cause-specific for the event of interest. | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
+`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%). Not estimated if competing risks are present. Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
+
+
+# Competing events
+
+With only one event type, the `event` variable only has two levels: censoring, typically encoded as `0`, and the event, typically encoded as `1`. 
+
+With competing events, the `event` variable will have additional levels. The `survival::Surv()` function used by rifttable assumes that the first-ordered level represents censoring and the others are different non-censoring events. For example, if the event variable is a factor, then `"Censoring"` needs to be the first of the factor's `levels()`.
+
+It is necessary to specify the event of interest in the `design` if competing events are present. For example, if the `event` variable is a factor variable `status`, with levels `"Censored"`, `"Outcome of interest"`, and `"Other-cause death"`, then specify `type = "status@Outcome of interest"` in the table design.
+
+See the tables above for how competing events are handled. When no details are noted, the event of interest is recoded as the sole event and other events are considered censoring.