Merge pull request #316 from openpharma/310update

310update
openpharma · Feb 25, 2022 · 1445407 · 1445407
2 parents 3c9c2c1 + 6a94b2b
commit 1445407
Show file tree

Hide file tree

Showing 13 changed files with 291 additions and 188 deletions.
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,4 @@ inst/doc
 README.html
 docs/
 tests/testthat/_snaps/
+.Rprofile
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -49,13 +49,10 @@ Suggests:
     rmarkdown,
     testthat (>= 2.1.0),
     tibble,
-    tidycmprsk,
+    tidycmprsk (>= 0.1.1),
     vdiffr
 VignetteBuilder: 
     knitr
-Remotes: 
-    MSKCC-Epi-Bio/tidycmprsk,
-    tidymodels/hardhat
 biocViews:
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)

diff --git a/R/add_risktable.R b/R/add_risktable.R
@@ -65,14 +65,14 @@ add_risktable <- function(gg, ...){
 add_risktable.ggsurvfit <- function(
   gg
   ,times = NULL
-  ,statlist = c("n.risk")
+  ,statlist = "n.risk"
   ,label = NULL
   ,group = "strata"
   ,collapse = FALSE
   ,...
 ){
 
-  # Obtain the relevant table -----------------------------------------------
+  # Obtain the relevant table --------------------------------------------------
   tidy_object <- gg$data
   estimate_object <- .extract_estimate_object(gg)
 

diff --git a/R/estimate_cuminc.R b/R/estimate_cuminc.R
@@ -28,25 +28,16 @@
 #' ) %>%
 #'   visr() %>%
 #'   add_CI() %>%
-#'   add_risktable(statlist = c("n.risk", "cumulative.event"))
+#'   add_risktable(statlist = c("n.risk", "cum.event"))
 
 estimate_cuminc <- function(data
                             ,strata = NULL
                             ,CNSR = "CNSR"
                             ,AVAL = "AVAL"
                             ,conf.int = 0.95
-                            ,...){
+                            ,...) {
   # check for installation of tidycmprsk package
-  if (!"tidycmprsk" %in% rownames(utils::installed.packages()) ||
-      utils::packageVersion("tidycmprsk") < "0.1.0.9003") {
-    message("Install updated version of 'tidycmprsk' with `devtools::install_github('MSKCC-Epi-Bio/tidycmprsk')`")
-    return(invisible())
-  }
-  if (!"hardhat" %in% rownames(utils::installed.packages()) ||
-      utils::packageVersion("hardhat") <= "0.1.6") {
-    message("Install updated version of 'hardhat' with `devtools::install_github('tidymodels/hardhat')`")
-    return(invisible())
-  }
+  rlang::check_installed("tidycmprsk", version = "0.1.1")
 
   # checking/prepping inputs ---------------------------------------------------
   strata <- strata %||% "1" %>% paste(collapse = " + ")
@@ -69,12 +60,12 @@ estimate_cuminc <- function(data
 visr_tidy_tidycuminc <- function(x, times = NULL) {
   df_visr_tidy <-
     tidycmprsk::tidy(x, times = times) %>%
-    dplyr::filter(.data$outcome %in% names(x$failcode)[1]) %>%
+    dplyr::filter(.data[["outcome"]] %in% names(x$failcode)[1]) %>%
     # renaming to match column name in the survfit equivalent of these functions
     dplyr::rename(
-      est = .data$estimate,
-      est.lower = .data$conf.low,
-      est.upper = .data$conf.high
+      est = .data[["estimate"]],
+      est.lower = .data[["conf.low"]],
+      est.upper = .data[["conf.high"]]
     )
 
   # adding strata column if not already present

diff --git a/R/get_risktable.R b/R/get_risktable.R
@@ -19,10 +19,11 @@ get_risktable <- function(x, ...){
 
 #' @param x an object of class `survfit` or `tidycuminc`
 #' @param times Numeric vector indicating the times at which the risk set, censored subjects, events are calculated.
-#' @param statlist Character vector indicating which summary data to present. Current choices are "n.risk" "n.event" "n.censor".
-#'   Default is "n.risk". Competing risk models also have the option of "cumulative.event" and "cumulative.censor"
-#' @param label Character vector with labels for the statlist. Default matches "n.risk" with "At risk", "n.event" with "Events" and "n.censor"
-#'   with "Censored".
+#' @param statlist Character vector indicating which summary data to present. Current choices are "n.risk" "n.event"
+#'   "n.censor", "cum.event", "cum.censor".
+#'   Default is "n.risk".
+#' @param label Character vector with labels for the statlist. Default matches "n.risk" with "At risk", "n.event" with
+#'   "Events", "n.censor" with "Censored", "cum.event" with "Cum. Event", and "cum.censor" with "Cum. Censor".
 #' @param group String indicating the grouping variable for the risk tables.
 #'   Current options are:
 #'     \itemize{
@@ -47,17 +48,20 @@ get_risktable <- function(x, ...){
 get_risktable.survfit <- function(
   x
   ,times = NULL
-  ,statlist = c("n.risk")
+  ,statlist = "n.risk"
   ,label = NULL
-  ,group = "strata"
+  ,group = c("strata", "statlist")
   ,collapse = FALSE
   ,...
 ){
-  
+
 # User input validation ---------------------------------------------------
+  group <- match.arg(group)
 
-  if (!base::all(statlist %in% c("n.risk", "n.censor", "n.event")))
-    stop("statlist argument not valid. Current options are n.risk, n.censor and n.event.")
+  if (!base::all(statlist %in% c("n.risk", "n.censor", "n.event",
+                                 "cum.censor", "cum.event")))
+    stop("statlist argument not valid. Current options are n.risk, n.censor,
+         n.event, cum.event, cum.censor")
 
   if (!is.null(label) & !base::all(is.character(label)) & !base::inherits(label, "factor"))
     stop("label arguments should be of class `character` or `factor`.")
@@ -68,52 +72,51 @@ get_risktable.survfit <- function(
   if (base::any(times < 0))
     stop("Negative times are not valid.")
 
-  if (length(group)>1 | !(base::all(group %in% c("statlist", "strata"))))
-    stop("group should equal statlist or strata.")
-
 # Clean input ------------------------------------------------------------
 
   tidy_object <- tidyme(x)
   statlist <- unique(statlist)
-  
+
 # Match amount of elements in label with statlist -------------------------
-  
+
   if (length(label) <= length(statlist)) {
-
-    vlookup <- data.frame( statlist = c("n.risk", "n.censor", "n.event")
-                          ,label = c("At risk", "Censored", "Events")
+
+    vlookup <- data.frame(statlist = c("n.risk", "n.censor", "n.event",
+                                       "cum.censor", "cum.event")
+                          ,label = c("At risk", "Censored", "Events",
+                                     "Cum. Censored", "Cum. Events")
                           ,check.names = FALSE
                           ,stringsAsFactors = FALSE)
-    
+
 
     label <- c(label, rep(NA, length(statlist)-length(label)))
-    
+
     have <- data.frame( cbind(label, statlist)
                        ,check.names = FALSE
                        ,stringsAsFactors = TRUE)
-    
+
     label_lookup <- vlookup %>%
       dplyr::right_join(have, by = "statlist") %>%
       dplyr::mutate(label = dplyr::coalesce(label.y, label.x)) %>%
       dplyr::select(-label.x, -label.y) %>%
       as.data.frame()
 
   } else if (length(label) > length(statlist)) {
-    
+
     label_lookup <- data.frame( statlist = statlist
                                ,label = label[1:length(statlist)]
                                ,check.names = FALSE
                                ,stringsAsFactors = TRUE)
-    
+
   }
-   
+
 
 # Ensure the order of the label corresponds to statlist order-------------
-  
+
   statlist_order <- factor(statlist, levels = statlist)
   label_lookup[["statlist"]] <- factor(label_lookup[["statlist"]], levels = statlist)
   label_lookup <- label_lookup[order(label_lookup[["statlist"]]), ]
-  
+
 # Generate time ticks ----------------------------------------------------
 
   if (is.null(times)) {
@@ -129,18 +132,27 @@ get_risktable.survfit <- function(
   # Risk table per statlist -------------------------------------------------
 
   ## labels of risk table are strata, titles are specified through `label
-
- per_statlist <- data.frame(
-    time = survfit_summary$time,
-    strata = base::factor(.get_strata(survfit_summary[["strata"]]), levels = unique(.get_strata(survfit_summary[["strata"]]))),
-    n.risk = survfit_summary$n.risk,
-    n.event = survfit_summary$n.event,
-    n.censor = survfit_summary$n.censor
-  ) %>%
-    dplyr::arrange(strata, time)%>%
-    dplyr::rename(y_values = strata)%>%
+
+ per_statlist <-
+    data.frame(
+      time = survfit_summary$time,
+      strata =
+        base::factor(.get_strata(survfit_summary[["strata"]]),
+                     levels = unique(.get_strata(survfit_summary[["strata"]]))),
+      n.risk = survfit_summary[["n.risk"]],
+      n.event = survfit_summary[["n.event"]],
+      n.censor = survfit_summary[["n.censor"]]
+    ) %>%
+    dplyr::arrange(.data[["strata"]], .data[["time"]]) %>%
+    dplyr::group_by(.data[["strata"]]) %>%
+    dplyr::mutate(
+      cum.event = cumsum(.data[["n.event"]]),
+      cum.censor = cumsum(.data[["n.censor"]])
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::rename(y_values = strata) %>%
     as.data.frame()
-  
+
   final <- per_statlist[ , c("time", "y_values", levels(statlist_order))]
 
   attr(final, 'time_ticks') <- times
@@ -152,17 +164,21 @@ get_risktable.survfit <- function(
   if (group == "strata" & collapse == FALSE){
     per_strata <- per_statlist %>%
       dplyr::arrange(time) %>%
-      tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event")
+      tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event",
+                                    "cum.censor", "cum.event")
                           ,names_to = "statlist"
                           ,values_to = "values") %>%
       tidyr::pivot_wider(names_from = "y_values", values_from = values) %>%
       dplyr::rename(y_values = statlist) %>%
       dplyr::filter(y_values %in% statlist)%>%
       as.data.frame()
-
-    per_strata[["y_values"]] <- factor(per_strata[["y_values"]], levels = levels(label_lookup[["statlist"]]), labels = label_lookup[["label"]])
+
+    per_strata[["y_values"]] <-
+      factor(per_strata[["y_values"]],
+             levels = levels(label_lookup[["statlist"]]),
+             labels = label_lookup[["label"]])
     per_strata <- per_strata[order(per_strata[["y_values"]]), ]
-    
+
     title <- levels(per_statlist[["y_values"]])
 
     final <- per_strata
@@ -182,10 +198,13 @@ get_risktable.survfit <- function(
         n.risk = sum(n.risk)
         ,n.event = sum(n.event)
         ,n.censor = sum(n.censor)
+        ,cum.event = sum(.data[["cum.event"]])
+        ,cum.censor = sum(.data[["cum.censor"]])
       ) %>%
       dplyr::ungroup() %>%
       dplyr::select(-strata) %>%
-      tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event")
+      tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event",
+                                    "cum.censor", "cum.event")
                            ,names_to = "y_values"
                            ,values_to = "Overall") %>%
       dplyr::filter(y_values %in% statlist) %>%
@@ -215,16 +234,20 @@ get_risktable.tidycuminc <- function(x
                                      ,times = pretty(x$tidy$time, 10)
                                      ,statlist = c("n.risk")
                                      ,label = NULL
-                                     ,group = "strata"
+                                     ,group = c("strata", "statlist")
                                      ,collapse = FALSE
                                      ,...) {
+  # check for installation of tidycmprsk package
+  rlang::check_installed("tidycmprsk", version = "0.1.1")
+  group <- match.arg(group)
+
   # list of statistics and their default labels
   lst_stat_labels_default <-
     list(n.risk = "At Risk",
-         n.event = "N Event",
-         n.censor = "N Censored",
-         cumulative.event = "Cum. N Event",
-         cumulative.censor = "Cum. N Censored")
+         n.event = "Events",
+         n.censor = "Censored",
+         cum.event = "Cum. Events",
+         cum.censor = "Cum. Censored")
 
   label <-
     .reconcile_statlist_and_labels(
@@ -247,41 +270,46 @@ get_risktable.tidycuminc <- function(x
       dplyr::group_by(dplyr::across(dplyr::any_of(c("time", "outcome", "strata")))) %>%
       dplyr::mutate(
         dplyr::across(
-          dplyr::any_of(c("n.risk", "n.event", "cumulative.event",
-                          "n.censor", "cumulative.censor")),
+          dplyr::any_of(c("n.risk", "n.event", "cum.event",
+                          "n.censor", "cum.censor")),
           ~sum(., na.rm = TRUE))
       ) %>%
       dplyr::filter(dplyr::row_number() == 1L) %>%
       dplyr::ungroup()
   }
 
-  if (group %in% "strata" || isTRUE(collapse)) {
+  if (group == "strata" || isTRUE(collapse)) {
+    strata_levels <- unique(tidy[["strata"]]) %>% sort() %>% as.character()
+
     result <-
       tidy %>%
       dplyr::select(dplyr::any_of(c("time", "strata", "n.risk", "n.event",
-                                    "cumulative.event", "n.censor", "cumulative.censor"))) %>%
-      tidyr::pivot_longer(cols = -c(.data$time, .data$strata)) %>%
+                                    "cum.event", "n.censor", "cum.censor"))) %>%
+      tidyr::pivot_longer(cols = -c(.data[["time"]], .data[["strata"]])) %>%
       tidyr::pivot_wider(
-        id_cols = c(.data$time, .data$name),
+        id_cols = c(.data[["time"]], .data[["name"]]),
         values_from = "value",
         names_from = "strata"
       ) %>%
+      dplyr::relocate(dplyr::any_of(strata_levels), .after = dplyr::last_col()) %>%
       dplyr::mutate(
-        y_values = dplyr::recode(.data$name, !!!lst_stat_labels)
+        y_values = dplyr::recode(.data[["name"]], !!!lst_stat_labels)
       ) %>%
-      dplyr::filter(.data$name %in% .env$statlist) %>%
-      dplyr::select(.data$time, .data$y_values, dplyr::everything(), -.data$name) %>%
+      dplyr::filter(.data[["name"]] %in% .env[["statlist"]]) %>%
+      dplyr::select(.data[["time"]], .data[["y_values"]], dplyr::everything(), -.data[["name"]]) %>%
+      dplyr::mutate(y_values = factor(.data[["y_values"]], levels = .env[["label"]])) %>%
+      dplyr::arrange(.data[["y_values"]], .data[["time"]]) %>%
       as.data.frame()
     attr(result, "title") <- names(result) %>% setdiff(c("time", "y_values"))
     attr(result, "statlist") <- names(result) %>% setdiff(c("time", "y_values"))
   }
-  else if (group %in% "statlist") {
+  else if (group == "statlist") {
     result <-
       tidy %>%
-      dplyr::select(.data$time, y_values = .data$strata,
+      dplyr::select(.data[["time"]], y_values = .data[["strata"]],
                     dplyr::any_of(c("n.risk",
-                                    "n.event", "cumulative.event",
-                                    "n.censor", "cumulative.censor"))) %>%
+                                    "n.event", "cum.event",
+                                    "n.censor", "cum.censor"))) %>%
       as.data.frame()
 
     attr(result, "statlist") <- names(lst_stat_labels_default[statlist])

diff --git a/R/tidyme.R b/R/tidyme.R
@@ -22,7 +22,9 @@
 #' lm_tidied <- visR::tidyme(lm_object)
 #' lm_tidied
 #'
-#' @return Tibble containing all list elements of the S3 object as columns
+#' @return Tibble containing all list elements of the S3 object as columns. 
+#'   The column 'strata' is a factor to ensure that the strata are sorted 
+#'   in agreement with the order in the `survfit` object
 #'
 #' @rdname tidyme
 #'
@@ -82,12 +84,12 @@ tidyme.survfit <- function(x, ...) {
 
     if (!is.null(x[["strata"]])) {
       retme[["strata"]] <- rep(names(x[["strata"]]), x[["strata"]])
-
       retme[["n.strata"]] <- rep(x[["n"]], x[["strata"]])
     }
   }
 
   attr(retme, "survfit_object") <- survfit_object
+  retme[["strata"]] <- factor(retme[["strata"]], levels = unique(retme[["strata"]]))
 
   return(as.data.frame(retme))
 }
diff --git a/R/visr.R b/R/visr.R
@@ -527,6 +527,9 @@ visr.tidycuminc <- function(x = NULL
                             ,y_ticks = pretty(c(0, 1), 5)
                             ,legend_position = "right"
                             ,...){
+  # check for installation of tidycmprsk package
+  rlang::check_installed("tidycmprsk", version = "0.1.1")
+
   if (!is.null(x_units)) {
     x_label <- paste0(x_label, " (", x_units, ")")
   }