Skip to content

Commit

Permalink
handle competing events
Browse files Browse the repository at this point in the history
estimate cumulative incidence for the event of interest in cuminc and cumincdiff; censor remaining events in other approaches
  • Loading branch information
stopsack committed Feb 26, 2024
1 parent 7c6aff6 commit af25a34
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 34 deletions.
60 changes: 48 additions & 12 deletions R/estimate_event_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ estimate_event_time <- function(
to,
arguments,
is_trend,
event_type = NULL,
...) {
if(is_trend)
return(tibble::tibble())
Expand Down Expand Up @@ -169,6 +170,10 @@ estimate_event_time <- function(
},
"medsurv" =,
"medsurv (ci)" = {
if(!is.null(event_type)) {
warning(paste(
"type = 'medsurv': Note the presence of competing events."))
}
data %>%
dplyr::summarize(
res = {
Expand Down Expand Up @@ -254,21 +259,30 @@ estimate_event_time <- function(
"surv (ci)" =,
"cuminc" =,
"cuminc (ci)" = {
if(!is.null(event_type) &
stringr::str_detect(
string = type,
pattern = "surv")
) {
stop(paste(
"Survival (type = 'surv') is not estimated with competing risks.",
"Use type = 'cuminc'."))
}
data %>%
dplyr::summarize(
res = {
if(is.na(time2[1])) {
fit <- survival::survfit(
formula = survival::Surv(
time = .data$.time,
event = .data$.event) ~ 1,
event = .data$.event_compete) ~ 1,
conf.int = ci)
} else {
fit <- survival::survfit(
formula = survival::Surv(
time = .data$.time_orig,
time2 = .data$.time2,
event = .data$.event) ~ 1,
event = .data$.event_compete) ~ 1,
conf.int = ci)
}
if(is.na(timepoint))
Expand All @@ -281,7 +295,9 @@ estimate_event_time <- function(

if(stringr::str_detect(
string = type,
pattern = "cuminc")) {
pattern = "cuminc") &
is.null(event_type)
) {
added <- 1
multiply <- -1
first_limit <- "upper"
Expand All @@ -293,13 +309,37 @@ estimate_event_time <- function(
second_limit <- "upper"
}

if(is.null(event_type)) {
est <- fit %>%
purrr::pluck("surv") %>%
dplyr::last()
ci_first <- fit %>%
purrr::pluck(first_limit) %>%
dplyr::last()
ci_second <- fit %>%
purrr::pluck(second_limit) %>%
dplyr::last()
} else {
est <- utils::tail(
fit$pstate[, which(fit$states == event_type)],
n = 1)
ci_first <- fit %>%
purrr::pluck(first_limit)
ci_first <- utils::tail(
ci_first[, which(fit$states == event_type)],
n = 1)
ci_second <- fit %>%
purrr::pluck(second_limit)
ci_second <- utils::tail(
ci_second[, which(fit$states == event_type)],
n = 1)
}

paste0(
format_round(
(added +
multiply *
(fit %>%
purrr::pluck("surv") %>%
dplyr::last())) *
est) *
percent_100,
digits = digits),
percent_symbol,
Expand All @@ -311,18 +351,14 @@ estimate_event_time <- function(
format_round(
(added +
multiply *
(fit %>%
purrr::pluck(first_limit) %>%
dplyr::last())) *
ci_first) *
percent_100,
digits = digits),
to,
format_round(
(added +
multiply *
(fit %>%
purrr::pluck(second_limit) %>%
dplyr::last())) *
ci_second) *
percent_100,
digits = digits),
")"),
Expand Down
7 changes: 5 additions & 2 deletions R/estimate_survdiff.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @param event Name of event variable
#' @param time Name of time variable
#' @param time2 Name of second time variable, if any
#' @param event_type Level of event variable with competing risks, if any
#' @param ...
#'
#' @return Tibble
Expand All @@ -43,6 +44,7 @@ estimate_survdiff <- function(
to,
reference,
arguments,
event_type,
...) {
if(is_trend)
return(tibble::tibble())
Expand Down Expand Up @@ -79,7 +81,7 @@ estimate_survdiff <- function(
is.na(time2),
true = "survival::Surv(time = .time, ",
false = "survival::Surv(time = .time_orig, time2 = .time2, "),
"event = .event) ~ .exposure")),
"event = .event_compete) ~ .exposure")),
data = data,
time = timepoint,
estimand = dplyr::if_else(
Expand All @@ -88,7 +90,8 @@ estimate_survdiff <- function(
pattern = "survdiff"),
true = "survival",
false = "cuminc"),
conf.level = ci) %>%
conf.level = ci,
event_type = event_type) %>%
dplyr::mutate(
term = paste0(".exposure", .data$term)) %>%
format_regression_results(
Expand Down
2 changes: 2 additions & 0 deletions R/fill_cells.R
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ fill_cells <- function(
reference = reference,
factor = factor,
arguments = arguments,
event_type = data_prep$event_type,
is_trend = FALSE))

if(setequal(
Expand Down Expand Up @@ -320,6 +321,7 @@ fill_cells <- function(
reference = reference,
factor = factor,
arguments = arguments,
event_type = data_prep$event_type,
is_trend = TRUE))
if(setequal(
res_cat,
Expand Down
73 changes: 72 additions & 1 deletion R/prepare_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,22 @@ prepare_data <- function(
data$.outcome <- data[[outcome]]
}
}
event_type <- NULL
if(!is.na(time) & !is.na(event)) {
if(time != "" & event != "") {
if(stringr::str_detect(
string = event,
pattern = "@")
) {
event_type <- stringr::str_split_i(
string = event,
pattern = "@",
i = 2)
event <- stringr::str_split_i(
string = event,
pattern = "@",
i = 1)
}
if(event != outcome | is.na(outcome)) {
if(!(event %in% names(data)))
stop(
Expand All @@ -135,6 +149,62 @@ prepare_data <- function(
data <- data %>%
dplyr::mutate(.event = .data$.outcome)
}
if(!is.null(event_type)) {
if(!any(event_type %in% unique(data$.event))) {
stop(paste0(
"For event variable '",
event,
"', the specified event type '",
event_type,
"' is not available in the data. Available are: ",
paste(
unique(data$.event),
collapse = " "
)
))
}
if(!length(stats::na.omit(unique(data$.event))) > 2)
stop(paste0(
"For event variable '",
event,
"', the event type '",
event_type,
"' has been specified. However, the event variable does not ",
"appear to have more than two levels to encode competing events ",
"and censoring. Available levels are only: ",
paste(
unique(data$.event),
collapse = " "
)
))
} else {
if(length(stats::na.omit(unique(data$.event))) > 2)
stop(paste0(
"The event variable '",
event,
"' has more than two non-missing levels, suggesting that ",
"competing events may be encoded, but no specific event type ",
"(variable level) has been requested via ",
"event = 'event_variable@level'. Available levels are: ",
paste(
unique(data$.event),
collapse = " "
)
))
}
# Recode event variable for estimators that only handle one event type
data$.event_compete <- data$.event
if(!is.null(event_type)) {
data <- data %>%
dplyr::mutate(
.event = dplyr::if_else(
condition = .data$.event == event_type,
true = 1,
false = 0
)
)
}

if(!(time %in% names(data)))
stop(
paste0(
Expand Down Expand Up @@ -207,5 +277,6 @@ prepare_data <- function(
list(
data = data,
pattern = pattern,
xlevels = xlevels)
xlevels = xlevels,
event_type = event_type)
}
34 changes: 25 additions & 9 deletions R/survdiff_ci.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' or cumulative incidence (\code{"cuminc"})? This parameter affects the
#' sign of the differences. Defaults to \code{"survival"}.
#' @param conf.level Optional. Confidence level. Defaults to \code{0.95}.
#' @param event_type Optional. Event type (level) for event variable with
#' competing events. Defaults to \code{NULL}.
#'
#' @references
#' Com-Nougue C, Rodary C, Patte C. How to establish equivalence when data are
Expand Down Expand Up @@ -65,22 +67,36 @@ survdiff_ci <- function(
data,
time,
estimand = c("survival", "cuminc"),
conf.level = 0.95
conf.level = 0.95,
event_type = NULL
) {
zval <- stats::qnorm(1 - (1 - conf.level) / 2)
estimand <- match.arg(estimand)
res <- summary(
survival::survfit(
formula = formula,
data = data),
time = time)
res <- tibble::tibble(
term = res$strata,
surv = res$surv,
se = res$std.err
)
if(estimand == "cuminc")
res$surv <- 1 - res$surv
time = time,
extend = TRUE)
if(is.null(event_type)) {
res <- tibble::tibble(
term = res$strata,
surv = res$surv,
se = res$std.err
)
if(estimand == "cuminc")
res$surv <- 1 - res$surv
} else {
if(estimand == "survival")
stop(paste(
"type = 'survdiff' may not be meaningful with competing events.",
"Use: type = 'cumincdiff'."))
res <- tibble::tibble(
term = res$strata,
surv = res$pstate[, which(res$states == event_type)],
se = res$std.err[, which(res$states == event_type)]
)
}
res %>%
dplyr::transmute(
term = stringr::str_remove_all(
Expand Down
6 changes: 5 additions & 1 deletion man/survdiff_ci.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 20 additions & 9 deletions vignettes/estimators_survival.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,20 @@ tibble::tribble(

`type` | Description | Options (`arguments = `)
-----+-----------------+---------
`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
`"cuminc (ci)"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
`"cuminc (ci)"` | Cumulative incidence ("risk"), as above, with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
`"events"` | Event count.
`"events/time"` | Events slash person-time.
`"events/time (rate)"` | A combination: Events slash time followed by rate in parentheses.
`"events/total"` | Events slash number of observations.
`"rate"` | Event rate: event count divided by person-time, multiplied by the `rifttable()` parameter `factor`.
`"rate (ci)"` | Event rate with confidence interval (default: 95%; Poisson-type large-sample interval).
`"surv"` | Survival from the Kaplan-Meier estimator. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
`"surv (ci)"` | Survival from the Kaplan-Meier estimator with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
`"surv"` | Survival from the Kaplan-Meier estimator. Not estimated if competing risks are present. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
`"surv (ci)"` | Survival from the Kaplan-Meier estimator, as above, with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
`"time"` | Person-time.
`"maxfu"` | Maximum follow-up time.
`"medfu"` | Median follow-up (reverse Kaplan-Meier), equals median survival for censoring.
`"medfu (iqr)"` | Median and interquartile range for follow-up.
`"medfu"` | Median follow-up ("reverse Kaplan-Meier"), equals median survival for censoring. If competing risks are present, events other than the event of interest are also considered censoring to estimate the median follow-up for the event of interest.
`"medfu (iqr)"` | Median and interquartile range for follow-up, as above.
`"medsurv"` | Median survival.
`"medsurv (ci)"` | Median survival with confidence interval (default: 95%).

Expand All @@ -100,6 +100,17 @@ tibble::tribble(

`type` | Description | Options (`arguments = `)
-----+-----------------+-------------
`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
`"hr"` | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator or, if competing risks are present, its generalized form, the Aalen-Johansen estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
`"hr"` | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). If competing events are present, hazard ratios are cause-specific for the event of interest. | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%). Not estimated if competing risks are present. Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.


# Competing events

With only one event type, the `event` variable only has two levels: censoring, typically encoded as `0`, and the event, typically encoded as `1`.

With competing events, the `event` variable will have additional levels. The `survival::Surv()` function used by rifttable assumes that the first-ordered level represents censoring and the others are different non-censoring events. For example, if the event variable is a factor, then `"Censoring"` needs to be the first of the factor's `levels()`.

It is necessary to specify the event of interest in the `design` if competing events are present. For example, if the `event` variable is a factor variable `status`, with levels `"Censored"`, `"Outcome of interest"`, and `"Other-cause death"`, then specify `type = "status@Outcome of interest"` in the table design.

See the tables above for how competing events are handled. When no details are noted, the event of interest is recoded as the sole event and other events are considered censoring.

0 comments on commit af25a34

Please sign in to comment.