1301 feature request add confidence intervals for quantiles in surv t…

…ime (#1306) # Pull Request Fixes #1301 @Melkiades I'm not sure about the impact of adding new stats to an analyze function on the remainder of the code/packages. Could you review and share your thoughts? This is the first function for which we'd like to add extra stats, there will be more functions for which we'd like to combine the stat and it's confidence interval into 1 line. Once we know the approach we can follow, more of these types of updates might follow. --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com> Co-authored-by: Davide Garolini <[email protected]> Co-authored-by: Joe Zhu <[email protected]> Co-authored-by: shajoezhu <[email protected]>
insightsengineering · Nov 20, 2024 · e6b11ed · e6b11ed
1 parent f5f706c
commit e6b11ed
Show file tree

Hide file tree

Showing 19 changed files with 482 additions and 111 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -20,6 +20,11 @@
 # tern 0.9.6
 
 ### Enhancements
+* Added `median_ci_3d` to `s_summary` which includes estimate and confidence interval in one statistic.
+* Added `median_ci_3d`, `quantiles_lower` and `quantiles_upper` to `s_surv_time` which includes estimate and confidence interval in one statistic.
+* Added `hr_ci_3d` to `s_coxph_pairwise` which includes estimate and confidence interval in one statistic.
+* Added `event_free_rate_3d` to `s_surv_timepoint` which includes estimate and confidence interval in one statistic.
+* Added `rate_diff_ci_3d` to `s_surv_timepoint_diff` which includes estimate and confidence interval in one statistic.
 * Added `errorbar_width` and `linetype` parameters to `g_lineplot`.
 * Added the `.formats` argument to `tabulate_rsp_subgroups` and `tabulate_survival_subgroups` to allow users to specify formats.
 * Added the `riskdiff` argument to `tabulate_rsp_subgroups` and `tabulate_survival_subgroups` to allow users to add a risk difference table column, and function `control_riskdiff` to specify settings for the risk difference column.

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
@@ -190,6 +190,8 @@ s_summary.numeric <- function(x,
   mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
   names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
   y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")
+  mean_ci_3d <- c(y$mean, y$mean_ci)
+  y$mean_ci_3d <- formatters::with_label(mean_ci_3d, paste0("Mean (", f_conf_level(control$conf_level), ")"))
 
   mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
   y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))
@@ -201,6 +203,9 @@ s_summary.numeric <- function(x,
   median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
   y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))
 
+  median_ci_3d <- c(y$median, median_ci)
+  y$median_ci_3d <- formatters::with_label(median_ci_3d, paste0("Median (", f_conf_level(control$conf_level), ")"))
+
   q <- control$quantiles
   if (any(is.na(x))) {
     qnts <- rep(NA_real_, length(q))
@@ -233,6 +238,12 @@ s_summary.numeric <- function(x,
 
   y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off
 
+  geom_mean_ci_3d <- c(y$geom_mean, y$geom_mean_ci)
+  y$geom_mean_ci_3d <- formatters::with_label(
+    geom_mean_ci_3d,
+    paste0("Geometric Mean (", f_conf_level(control$conf_level), ")")
+  )
+
   y
 }
 

diff --git a/R/survival_coxph_pairwise.R b/R/survival_coxph_pairwise.R
@@ -66,6 +66,7 @@ s_coxph_pairwise <- function(df,
         pvalue = formatters::with_label("", paste0("p-value (", pval_method, ")")),
         hr = formatters::with_label("", "Hazard Ratio"),
         hr_ci = formatters::with_label("", f_conf_level(conf_level)),
+        hr_ci_3d = formatters::with_label("", paste0("Hazard Ratio (", f_conf_level(conf_level), ")")),
         n_tot = formatters::with_label("", "Total n"),
         n_tot_events = formatters::with_label("", "Total events")
       )
@@ -112,6 +113,10 @@ s_coxph_pairwise <- function(df,
     pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
     hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
     hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
+    hr_ci_3d = formatters::with_label(
+      c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])),
+      paste0("Hazard Ratio (", f_conf_level(conf_level), ")")
+    ),
     n_tot = formatters::with_label(sum_cox$n, "Total n"),
     n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
   )
@@ -125,11 +130,12 @@ s_coxph_pairwise <- function(df,
 #' @keywords internal
 a_coxph_pairwise <- make_afun(
   s_coxph_pairwise,
-  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L),
+  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L, hr_ci_3d = 0L),
   .formats = c(
     pvalue = "x.xxxx | (<0.0001)",
     hr = "xx.xx",
     hr_ci = "(xx.xx, xx.xx)",
+    hr_ci_3d = "xx.xx (xx.xx - xx.xx)",
     n_tot = "xx.xx",
     n_tot_events = "xx.xx"
   )

diff --git a/R/survival_time.R b/R/survival_time.R
@@ -44,7 +44,10 @@ NULL
 #' * `s_surv_time()` returns the statistics:
 #'   * `median`: Median survival time.
 #'   * `median_ci`: Confidence interval for median time.
+#'   * `median_ci_3d`: Median with confidence interval for median time.
 #'   * `quantiles`: Survival time for two specified quantiles.
+#'   * `quantiles_lower`: quantile with confidence interval for the first specified quantile.
+#'   * `quantiles_upper`: quantile with confidence interval for the second specified quantile.
 #'   * `range_censor`: Survival time range for censored observations.
 #'   * `range_event`: Survival time range for observations with events.
 #'   * `range`: Survival time range for all observations.
@@ -71,10 +74,24 @@ s_surv_time <- function(df,
     conf.type = conf_type
   )
   srv_tab <- summary(srv_fit, extend = TRUE)$table
-  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
+  srv_qt_tab_pre <- stats::quantile(srv_fit, probs = quantiles)
+  srv_qt_tab <- srv_qt_tab_pre$quantile
   range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
   range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
   range <- range_noinf(df[[.var]], na.rm = TRUE)
+
+  names(quantiles) <- as.character(100 * quantiles)
+  srv_qt_tab_pre <- unlist(srv_qt_tab_pre)
+  srv_qt_ci <- lapply(quantiles, function(x) {
+    name <- as.character(100 * x)
+
+    c(
+      srv_qt_tab_pre[[paste0("quantile.", name)]],
+      srv_qt_tab_pre[[paste0("lower.", name)]],
+      srv_qt_tab_pre[[paste0("upper.", name)]]
+    )
+  })
+
   list(
     median = formatters::with_label(unname(srv_tab["median"]), "Median"),
     median_ci = formatters::with_label(
@@ -85,7 +102,20 @@ s_surv_time <- function(df,
     ),
     range_censor = formatters::with_label(range_censor, "Range (censored)"),
     range_event = formatters::with_label(range_event, "Range (event)"),
-    range = formatters::with_label(range, "Range")
+    range = formatters::with_label(range, "Range"),
+    median_ci_3d = formatters::with_label(
+      c(
+        unname(srv_tab["median"]),
+        unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))])
+      ),
+      paste0("Median (", f_conf_level(conf_level), ")")
+    ),
+    quantiles_lower = formatters::with_label(
+      unname(srv_qt_ci[[1]]), paste0(quantiles[1] * 100, "%-ile (", f_conf_level(conf_level), ")")
+    ),
+    quantiles_upper = formatters::with_label(
+      unname(srv_qt_ci[[2]]), paste0(quantiles[2] * 100, "%-ile (", f_conf_level(conf_level), ")")
+    )
   )
 }
 
@@ -122,8 +152,17 @@ a_surv_time <- function(df,
   rng_censor_upr <- x_stats[["range_censor"]][2]
 
   # Use method-specific defaults
-  fmts <- c(median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x")
-  lbls <- c(median_ci = "95% CI", range = "Range", range_censor = "Range (censored)", range_event = "Range (event)")
+  fmts <- c(
+    median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x",
+    median_ci_3d = "xx.x (xx.x - xx.x)",
+    quantiles_lower = "xx.x (xx.x - xx.x)", quantiles_upper = "xx.x (xx.x - xx.x)"
+  )
+  lbls <- c(
+    median_ci = "95% CI", range = "Range", range_censor = "Range (censored)", range_event = "Range (event)",
+    median_ci_3d = "Median (95% CI)",
+    quantiles_lower = "25%-ile (95% CI)",
+    quantiles_upper = "75%-ile (95% CI)"
+  )
   lbls_custom <- .labels
   .formats <- c(.formats, fmts[setdiff(names(fmts), names(.formats))])
   .labels <- c(.labels, lbls[setdiff(names(lbls), names(lbls_custom))])
@@ -156,7 +195,6 @@ a_surv_time <- function(df,
     .names = .labels,
     .labels = .labels,
     .indent_mods = .indent_mods,
-    .format_na_strs = na_str,
     .cell_footnotes = cell_fns
   )
 }
@@ -199,7 +237,7 @@ surv_time <- function(lyt,
                       .labels = NULL,
                       .indent_mods = c(median_ci = 1L)) {
   extra_args <- list(
-    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str,
+    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods,
     is_event = is_event, control = control, ref_fn_censor = ref_fn_censor, ...
   )
 

diff --git a/R/survival_timepoint.R b/R/survival_timepoint.R
@@ -40,6 +40,7 @@ NULL
 #'   * `event_free_rate`: Event-free rate (%).
 #'   * `rate_se`: Standard error of event free rate.
 #'   * `rate_ci`: Confidence interval for event free rate.
+#'   * `event_free_rate_3d`: Event-free rate (%) with Confidence interval.
 #'
 #' @keywords internal
 s_surv_timepoint <- function(df,
@@ -74,11 +75,15 @@ s_surv_timepoint <- function(df,
     rate_se <- df_srv_fit$std.err
     rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
   }
+  event_free_rate_3d <- c(event_free_rate, rate_ci)
   list(
     pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
     event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
     rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
-    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
+    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level)),
+    event_free_rate_3d = formatters::with_label(
+      event_free_rate_3d * 100, paste0("Event Free Rate (", f_conf_level(conf_level), ")")
+    )
   )
 }
 
@@ -111,6 +116,7 @@ a_surv_timepoint <- make_afun(
 #' * `s_surv_timepoint_diff()` returns the statistics:
 #'   * `rate_diff`: Event-free rate difference between two groups.
 #'   * `rate_diff_ci`: Confidence interval for the difference.
+#'   * `rate_diff_ci_3d`: Event-free rate difference and confidence interval between two groups.
 #'   * `ztest_pval`: p-value to test the difference is 0.
 #'
 #' @keywords internal
@@ -126,6 +132,9 @@ s_surv_timepoint_diff <- function(df,
       list(
         rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
         rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
+        rate_diff_ci_3d = formatters::with_label(
+          "", paste0("Difference in Event Free Rate", f_conf_level(control$conf_level))
+        ),
         ztest_pval = formatters::with_label("", "p-value (Z-test)")
       )
     )
@@ -143,6 +152,7 @@ s_surv_timepoint_diff <- function(df,
 
   qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
   rate_diff_ci <- rate_diff + qs * se_diff
+  rate_diff_ci_3d <- c(rate_diff, rate_diff_ci)
   ztest_pval <- if (is.na(rate_diff)) {
     NA
   } else {
@@ -151,6 +161,9 @@ s_surv_timepoint_diff <- function(df,
   list(
     rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
     rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
+    rate_diff_ci_3d = formatters::with_label(
+      rate_diff_ci_3d, paste0("Difference in Event Free Rate", f_conf_level(control$conf_level))
+    ),
     ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
   )
 }
@@ -167,6 +180,7 @@ a_surv_timepoint_diff <- make_afun(
   .formats = c(
     rate_diff = "xx.xx",
     rate_diff_ci = "(xx.xx, xx.xx)",
+    rate_diff_ci_3d = format_xx("xx.xx (xx.xx, xx.xx)"),
     ztest_pval = "x.xxxx | (<0.0001)"
   )
 )
@@ -260,8 +274,8 @@ surv_timepoint <- function(lyt,
   extra_args <- list(time_point = time_point, is_event = is_event, control = control, ...)
 
   f <- list(
-    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
-    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
+    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"),
+    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d")
   )
   .stats <- h_split_param(.stats, .stats, f = f)
   .formats <- h_split_param(.formats, names(.formats), f = f)

diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R
@@ -395,6 +395,20 @@ labels_use_control <- function(labels_default, control, labels_custom = NULL) {
       labels_default["quantiles"]
     )
   }
+  if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) &&
+    !"quantiles_lower" %in% names(labels_custom)) { # nolint
+    labels_default["quantiles_lower"] <- gsub(
+      "[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""),
+      labels_default["quantiles_lower"]
+    )
+  }
+  if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) &&
+    !"quantiles_upper" %in% names(labels_custom)) { # nolint
+    labels_default["quantiles_upper"] <- gsub(
+      "[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""),
+      labels_default["quantiles_upper"]
+    )
+  }
   if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) &&
     !"mean_pval" %in% names(labels_custom)) { # nolint
     labels_default["mean_pval"] <- gsub(
@@ -423,7 +437,9 @@ tern_default_stats <- list(
   analyze_vars_numeric = c(
     "n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval",
     "median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv",
-    "geom_mean", "geom_mean_ci", "geom_cv"
+    "geom_mean", "geom_mean_ci", "geom_cv",
+    "median_ci_3d",
+    "mean_ci_3d", "geom_mean_ci_3d"
   ),
   count_cumulative = c("count_fraction", "count_fraction_fixed_dp"),
   count_missed_doses = c("n", "count_fraction", "count_fraction_fixed_dp"),
@@ -443,8 +459,14 @@ tern_default_stats <- list(
   summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
   summarize_num_patients = c("unique", "nonunique", "unique_count"),
   summarize_patients_events_in_cols = c("unique", "all"),
-  surv_time = c("median", "median_ci", "quantiles", "range_censor", "range_event", "range"),
-  surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "rate_diff", "rate_diff_ci", "ztest_pval"),
+  surv_time = c(
+    "median", "median_ci", "median_ci_3d", "quantiles",
+    "quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range"
+  ),
+  surv_timepoint = c(
+    "pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "rate_diff", "rate_diff_ci", "ztest_pval",
+    "event_free_rate_3d"
+  ),
   tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
   tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval"),
   tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
@@ -479,10 +501,14 @@ tern_default_formats <- c(
   mean_sei = "(xx.xx, xx.xx)",
   mean_sdi = "(xx.xx, xx.xx)",
   mean_pval = "x.xxxx | (<0.0001)",
+  mean_ci_3d = "xx.xx (xx.xx - xx.xx)",
   median = "xx.x",
   mad = "xx.x",
   median_ci = "(xx.xx, xx.xx)",
+  median_ci_3d = "xx.xx (xx.xx - xx.xx)",
   quantiles = "xx.x - xx.x",
+  quantiles_lower = "xx.xx (xx.xx - xx.xx)",
+  quantiles_upper = "xx.xx (xx.xx - xx.xx)",
   iqr = "xx.x",
   range = "xx.x - xx.x",
   min = "xx.x",
@@ -491,6 +517,7 @@ tern_default_formats <- c(
   cv = "xx.x",
   geom_mean = "xx.x",
   geom_mean_ci = "(xx.xx, xx.xx)",
+  geom_mean_ci_3d = "xx.xx (xx.xx - xx.xx)",
   geom_cv = "xx.x",
   pval = "x.xxxx | (<0.0001)",
   pval_counts = "x.xxxx | (<0.0001)",
@@ -528,10 +555,14 @@ tern_default_labels <- c(
   mean_sei = "Mean -/+ 1xSE",
   mean_sdi = "Mean -/+ 1xSD",
   mean_pval = "Mean p-value (H0: mean = 0)",
+  mean_ci_3d = "Mean (95% CI)",
   median = "Median",
   mad = "Median Absolute Deviation",
   median_ci = "Median 95% CI",
+  median_ci_3d = "Median (95% CI)",
   quantiles = "25% and 75%-ile",
+  quantiles_lower = "25%-ile (95% CI)",
+  quantiles_upper = "75%-ile (95% CI)",
   iqr = "IQR",
   range = "Min - Max",
   min = "Minimum",
@@ -540,6 +571,7 @@ tern_default_labels <- c(
   cv = "CV (%)",
   geom_mean = "Geometric Mean",
   geom_mean_ci = "Geometric Mean 95% CI",
+  geom_mean_ci_3d = "Geometric Mean (95% CI)",
   geom_cv = "CV % Geometric Mean",
   pval = "p-value (t-test)", # Default for numeric
   pval_counts = "p-value (chi-squared test)", # Default for counts

diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
diff --git a/man/summarize_change.Rd b/man/summarize_change.Rd
diff --git a/man/survival_time.Rd b/man/survival_time.Rd