Skip to content

Commit

Permalink
Merge pull request #316 from openpharma/310update
Browse files Browse the repository at this point in the history
310update
  • Loading branch information
SHAESEN2 authored Feb 25, 2022
2 parents 3c9c2c1 + 6a94b2b commit 1445407
Show file tree
Hide file tree
Showing 13 changed files with 291 additions and 188 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ inst/doc
README.html
docs/
tests/testthat/_snaps/
.Rprofile
5 changes: 1 addition & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,10 @@ Suggests:
rmarkdown,
testthat (>= 2.1.0),
tibble,
tidycmprsk,
tidycmprsk (>= 0.1.1),
vdiffr
VignetteBuilder:
knitr
Remotes:
MSKCC-Epi-Bio/tidycmprsk,
tidymodels/hardhat
biocViews:
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions R/add_risktable.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ add_risktable <- function(gg, ...){
add_risktable.ggsurvfit <- function(
gg
,times = NULL
,statlist = c("n.risk")
,statlist = "n.risk"
,label = NULL
,group = "strata"
,collapse = FALSE
,...
){

# Obtain the relevant table -----------------------------------------------
# Obtain the relevant table --------------------------------------------------
tidy_object <- gg$data
estimate_object <- .extract_estimate_object(gg)

Expand Down
23 changes: 7 additions & 16 deletions R/estimate_cuminc.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,16 @@
#' ) %>%
#' visr() %>%
#' add_CI() %>%
#' add_risktable(statlist = c("n.risk", "cumulative.event"))
#' add_risktable(statlist = c("n.risk", "cum.event"))

estimate_cuminc <- function(data
,strata = NULL
,CNSR = "CNSR"
,AVAL = "AVAL"
,conf.int = 0.95
,...){
,...) {
# check for installation of tidycmprsk package
if (!"tidycmprsk" %in% rownames(utils::installed.packages()) ||
utils::packageVersion("tidycmprsk") < "0.1.0.9003") {
message("Install updated version of 'tidycmprsk' with `devtools::install_github('MSKCC-Epi-Bio/tidycmprsk')`")
return(invisible())
}
if (!"hardhat" %in% rownames(utils::installed.packages()) ||
utils::packageVersion("hardhat") <= "0.1.6") {
message("Install updated version of 'hardhat' with `devtools::install_github('tidymodels/hardhat')`")
return(invisible())
}
rlang::check_installed("tidycmprsk", version = "0.1.1")

# checking/prepping inputs ---------------------------------------------------
strata <- strata %||% "1" %>% paste(collapse = " + ")
Expand All @@ -69,12 +60,12 @@ estimate_cuminc <- function(data
visr_tidy_tidycuminc <- function(x, times = NULL) {
df_visr_tidy <-
tidycmprsk::tidy(x, times = times) %>%
dplyr::filter(.data$outcome %in% names(x$failcode)[1]) %>%
dplyr::filter(.data[["outcome"]] %in% names(x$failcode)[1]) %>%
# renaming to match column name in the survfit equivalent of these functions
dplyr::rename(
est = .data$estimate,
est.lower = .data$conf.low,
est.upper = .data$conf.high
est = .data[["estimate"]],
est.lower = .data[["conf.low"]],
est.upper = .data[["conf.high"]]
)

# adding strata column if not already present
Expand Down
146 changes: 87 additions & 59 deletions R/get_risktable.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ get_risktable <- function(x, ...){

#' @param x an object of class `survfit` or `tidycuminc`
#' @param times Numeric vector indicating the times at which the risk set, censored subjects, events are calculated.
#' @param statlist Character vector indicating which summary data to present. Current choices are "n.risk" "n.event" "n.censor".
#' Default is "n.risk". Competing risk models also have the option of "cumulative.event" and "cumulative.censor"
#' @param label Character vector with labels for the statlist. Default matches "n.risk" with "At risk", "n.event" with "Events" and "n.censor"
#' with "Censored".
#' @param statlist Character vector indicating which summary data to present. Current choices are "n.risk" "n.event"
#' "n.censor", "cum.event", "cum.censor".
#' Default is "n.risk".
#' @param label Character vector with labels for the statlist. Default matches "n.risk" with "At risk", "n.event" with
#' "Events", "n.censor" with "Censored", "cum.event" with "Cum. Event", and "cum.censor" with "Cum. Censor".
#' @param group String indicating the grouping variable for the risk tables.
#' Current options are:
#' \itemize{
Expand All @@ -47,17 +48,20 @@ get_risktable <- function(x, ...){
get_risktable.survfit <- function(
x
,times = NULL
,statlist = c("n.risk")
,statlist = "n.risk"
,label = NULL
,group = "strata"
,group = c("strata", "statlist")
,collapse = FALSE
,...
){

# User input validation ---------------------------------------------------
group <- match.arg(group)

if (!base::all(statlist %in% c("n.risk", "n.censor", "n.event")))
stop("statlist argument not valid. Current options are n.risk, n.censor and n.event.")
if (!base::all(statlist %in% c("n.risk", "n.censor", "n.event",
"cum.censor", "cum.event")))
stop("statlist argument not valid. Current options are n.risk, n.censor,
n.event, cum.event, cum.censor")

if (!is.null(label) & !base::all(is.character(label)) & !base::inherits(label, "factor"))
stop("label arguments should be of class `character` or `factor`.")
Expand All @@ -68,52 +72,51 @@ get_risktable.survfit <- function(
if (base::any(times < 0))
stop("Negative times are not valid.")

if (length(group)>1 | !(base::all(group %in% c("statlist", "strata"))))
stop("group should equal statlist or strata.")

# Clean input ------------------------------------------------------------

tidy_object <- tidyme(x)
statlist <- unique(statlist)

# Match amount of elements in label with statlist -------------------------

if (length(label) <= length(statlist)) {

vlookup <- data.frame( statlist = c("n.risk", "n.censor", "n.event")
,label = c("At risk", "Censored", "Events")

vlookup <- data.frame(statlist = c("n.risk", "n.censor", "n.event",
"cum.censor", "cum.event")
,label = c("At risk", "Censored", "Events",
"Cum. Censored", "Cum. Events")
,check.names = FALSE
,stringsAsFactors = FALSE)


label <- c(label, rep(NA, length(statlist)-length(label)))

have <- data.frame( cbind(label, statlist)
,check.names = FALSE
,stringsAsFactors = TRUE)

label_lookup <- vlookup %>%
dplyr::right_join(have, by = "statlist") %>%
dplyr::mutate(label = dplyr::coalesce(label.y, label.x)) %>%
dplyr::select(-label.x, -label.y) %>%
as.data.frame()

} else if (length(label) > length(statlist)) {

label_lookup <- data.frame( statlist = statlist
,label = label[1:length(statlist)]
,check.names = FALSE
,stringsAsFactors = TRUE)

}


# Ensure the order of the label corresponds to statlist order-------------

statlist_order <- factor(statlist, levels = statlist)
label_lookup[["statlist"]] <- factor(label_lookup[["statlist"]], levels = statlist)
label_lookup <- label_lookup[order(label_lookup[["statlist"]]), ]

# Generate time ticks ----------------------------------------------------

if (is.null(times)) {
Expand All @@ -129,18 +132,27 @@ get_risktable.survfit <- function(
# Risk table per statlist -------------------------------------------------

## labels of risk table are strata, titles are specified through `label

per_statlist <- data.frame(
time = survfit_summary$time,
strata = base::factor(.get_strata(survfit_summary[["strata"]]), levels = unique(.get_strata(survfit_summary[["strata"]]))),
n.risk = survfit_summary$n.risk,
n.event = survfit_summary$n.event,
n.censor = survfit_summary$n.censor
) %>%
dplyr::arrange(strata, time)%>%
dplyr::rename(y_values = strata)%>%

per_statlist <-
data.frame(
time = survfit_summary$time,
strata =
base::factor(.get_strata(survfit_summary[["strata"]]),
levels = unique(.get_strata(survfit_summary[["strata"]]))),
n.risk = survfit_summary[["n.risk"]],
n.event = survfit_summary[["n.event"]],
n.censor = survfit_summary[["n.censor"]]
) %>%
dplyr::arrange(.data[["strata"]], .data[["time"]]) %>%
dplyr::group_by(.data[["strata"]]) %>%
dplyr::mutate(
cum.event = cumsum(.data[["n.event"]]),
cum.censor = cumsum(.data[["n.censor"]])
) %>%
dplyr::ungroup() %>%
dplyr::rename(y_values = strata) %>%
as.data.frame()

final <- per_statlist[ , c("time", "y_values", levels(statlist_order))]

attr(final, 'time_ticks') <- times
Expand All @@ -152,17 +164,21 @@ get_risktable.survfit <- function(
if (group == "strata" & collapse == FALSE){
per_strata <- per_statlist %>%
dplyr::arrange(time) %>%
tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event")
tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event",
"cum.censor", "cum.event")
,names_to = "statlist"
,values_to = "values") %>%
tidyr::pivot_wider(names_from = "y_values", values_from = values) %>%
dplyr::rename(y_values = statlist) %>%
dplyr::filter(y_values %in% statlist)%>%
as.data.frame()

per_strata[["y_values"]] <- factor(per_strata[["y_values"]], levels = levels(label_lookup[["statlist"]]), labels = label_lookup[["label"]])

per_strata[["y_values"]] <-
factor(per_strata[["y_values"]],
levels = levels(label_lookup[["statlist"]]),
labels = label_lookup[["label"]])
per_strata <- per_strata[order(per_strata[["y_values"]]), ]

title <- levels(per_statlist[["y_values"]])

final <- per_strata
Expand All @@ -182,10 +198,13 @@ get_risktable.survfit <- function(
n.risk = sum(n.risk)
,n.event = sum(n.event)
,n.censor = sum(n.censor)
,cum.event = sum(.data[["cum.event"]])
,cum.censor = sum(.data[["cum.censor"]])
) %>%
dplyr::ungroup() %>%
dplyr::select(-strata) %>%
tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event")
tidyr::pivot_longer( cols = c("n.risk", "n.censor", "n.event",
"cum.censor", "cum.event")
,names_to = "y_values"
,values_to = "Overall") %>%
dplyr::filter(y_values %in% statlist) %>%
Expand Down Expand Up @@ -215,16 +234,20 @@ get_risktable.tidycuminc <- function(x
,times = pretty(x$tidy$time, 10)
,statlist = c("n.risk")
,label = NULL
,group = "strata"
,group = c("strata", "statlist")
,collapse = FALSE
,...) {
# check for installation of tidycmprsk package
rlang::check_installed("tidycmprsk", version = "0.1.1")
group <- match.arg(group)

# list of statistics and their default labels
lst_stat_labels_default <-
list(n.risk = "At Risk",
n.event = "N Event",
n.censor = "N Censored",
cumulative.event = "Cum. N Event",
cumulative.censor = "Cum. N Censored")
n.event = "Events",
n.censor = "Censored",
cum.event = "Cum. Events",
cum.censor = "Cum. Censored")

label <-
.reconcile_statlist_and_labels(
Expand All @@ -247,41 +270,46 @@ get_risktable.tidycuminc <- function(x
dplyr::group_by(dplyr::across(dplyr::any_of(c("time", "outcome", "strata")))) %>%
dplyr::mutate(
dplyr::across(
dplyr::any_of(c("n.risk", "n.event", "cumulative.event",
"n.censor", "cumulative.censor")),
dplyr::any_of(c("n.risk", "n.event", "cum.event",
"n.censor", "cum.censor")),
~sum(., na.rm = TRUE))
) %>%
dplyr::filter(dplyr::row_number() == 1L) %>%
dplyr::ungroup()
}

if (group %in% "strata" || isTRUE(collapse)) {
if (group == "strata" || isTRUE(collapse)) {
strata_levels <- unique(tidy[["strata"]]) %>% sort() %>% as.character()

result <-
tidy %>%
dplyr::select(dplyr::any_of(c("time", "strata", "n.risk", "n.event",
"cumulative.event", "n.censor", "cumulative.censor"))) %>%
tidyr::pivot_longer(cols = -c(.data$time, .data$strata)) %>%
"cum.event", "n.censor", "cum.censor"))) %>%
tidyr::pivot_longer(cols = -c(.data[["time"]], .data[["strata"]])) %>%
tidyr::pivot_wider(
id_cols = c(.data$time, .data$name),
id_cols = c(.data[["time"]], .data[["name"]]),
values_from = "value",
names_from = "strata"
) %>%
dplyr::relocate(dplyr::any_of(strata_levels), .after = dplyr::last_col()) %>%
dplyr::mutate(
y_values = dplyr::recode(.data$name, !!!lst_stat_labels)
y_values = dplyr::recode(.data[["name"]], !!!lst_stat_labels)
) %>%
dplyr::filter(.data$name %in% .env$statlist) %>%
dplyr::select(.data$time, .data$y_values, dplyr::everything(), -.data$name) %>%
dplyr::filter(.data[["name"]] %in% .env[["statlist"]]) %>%
dplyr::select(.data[["time"]], .data[["y_values"]], dplyr::everything(), -.data[["name"]]) %>%
dplyr::mutate(y_values = factor(.data[["y_values"]], levels = .env[["label"]])) %>%
dplyr::arrange(.data[["y_values"]], .data[["time"]]) %>%
as.data.frame()
attr(result, "title") <- names(result) %>% setdiff(c("time", "y_values"))
attr(result, "statlist") <- names(result) %>% setdiff(c("time", "y_values"))
}
else if (group %in% "statlist") {
else if (group == "statlist") {
result <-
tidy %>%
dplyr::select(.data$time, y_values = .data$strata,
dplyr::select(.data[["time"]], y_values = .data[["strata"]],
dplyr::any_of(c("n.risk",
"n.event", "cumulative.event",
"n.censor", "cumulative.censor"))) %>%
"n.event", "cum.event",
"n.censor", "cum.censor"))) %>%
as.data.frame()

attr(result, "statlist") <- names(lst_stat_labels_default[statlist])
Expand Down
6 changes: 4 additions & 2 deletions R/tidyme.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
#' lm_tidied <- visR::tidyme(lm_object)
#' lm_tidied
#'
#' @return Tibble containing all list elements of the S3 object as columns
#' @return Tibble containing all list elements of the S3 object as columns.
#' The column 'strata' is a factor to ensure that the strata are sorted
#' in agreement with the order in the `survfit` object
#'
#' @rdname tidyme
#'
Expand Down Expand Up @@ -82,12 +84,12 @@ tidyme.survfit <- function(x, ...) {

if (!is.null(x[["strata"]])) {
retme[["strata"]] <- rep(names(x[["strata"]]), x[["strata"]])

retme[["n.strata"]] <- rep(x[["n"]], x[["strata"]])
}
}

attr(retme, "survfit_object") <- survfit_object
retme[["strata"]] <- factor(retme[["strata"]], levels = unique(retme[["strata"]]))

return(as.data.frame(retme))
}
3 changes: 3 additions & 0 deletions R/visr.R
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,9 @@ visr.tidycuminc <- function(x = NULL
,y_ticks = pretty(c(0, 1), 5)
,legend_position = "right"
,...){
# check for installation of tidycmprsk package
rlang::check_installed("tidycmprsk", version = "0.1.1")

if (!is.null(x_units)) {
x_label <- paste0(x_label, " (", x_units, ")")
}
Expand Down
Loading

0 comments on commit 1445407

Please sign in to comment.