From d6c0ee955b338f14e6406da455e7741812e75c79 Mon Sep 17 00:00:00 2001 From: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Date: Sat, 2 Nov 2024 05:49:51 -0400 Subject: [PATCH 1/6] Refactor `count_patients_with_event()` and `count_patients_with_flags()` (#1343) # Pull Request Fixes #1342 --------- Co-authored-by: Joe Zhu --- NEWS.md | 2 +- R/count_patients_with_event.R | 95 +++++++----- R/count_patients_with_flags.R | 138 ++++++++++++------ man/count_patients_with_event.Rd | 27 ++-- man/count_patients_with_flags.Rd | 48 +++--- .../_snaps/count_patients_with_event.md | 25 ++++ .../_snaps/count_patients_with_flags.md | 42 ++++++ .../testthat/test-count_patients_with_event.R | 45 ++++++ .../testthat/test-count_patients_with_flags.R | 53 +++++++ 9 files changed, 364 insertions(+), 111 deletions(-) diff --git a/NEWS.md b/NEWS.md index aae0d2b378..e13937335e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ ### Enhancements * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`. * Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`. -* Refactored `a_count_occurrences_by_grade()` to no longer use `make_afun()`. +* Refactored `a_count_occurrences_by_grade()`, `a_count_patients_with_event()`, and `a_count_patients_with_flags()` to no longer use `make_afun()`. ### Enhancements * Added `rel_height_plot` parameter to `g_lineplot()` to control the line plot height relative to annotation table height. diff --git a/R/count_patients_with_event.R b/R/count_patients_with_event.R index 995873bef9..d0ecaa1267 100644 --- a/R/count_patients_with_event.R +++ b/R/count_patients_with_event.R @@ -21,7 +21,7 @@ #' #' Options are: ``r shQuote(get_stats("count_patients_with_event"))`` #' -#' @seealso [count_patients_with_flags] +#' @seealso [count_patients_with_flags()] #' #' @name count_patients_with_event #' @order 1 @@ -37,8 +37,6 @@ NULL #' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event. #' #' @examples -#' # `s_count_patients_with_event()` -#' #' s_count_patients_with_event( #' tern_ex_adae, #' .var = "SUBJID", @@ -95,8 +93,6 @@ s_count_patients_with_event <- function(df, #' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @examples -#' # `a_count_patients_with_event()` -#' #' a_count_patients_with_event( #' tern_ex_adae, #' .var = "SUBJID", @@ -106,10 +102,48 @@ s_count_patients_with_event <- function(df, #' ) #' #' @export -a_count_patients_with_event <- make_afun( - s_count_patients_with_event, - .formats = c(count_fraction = format_count_fraction_fixed_dp) -) +a_count_patients_with_event <- function(df, + labelstr = "", + filters, + denom = c("n", "N_col", "N_row"), + .N_col, # nolint + .N_row, # nolint + .df_row, + .var = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = default_na_str()) { + x_stats <- s_count_patients_with_event( + df = df, .var = .var, filters = filters, .N_col = .N_col, .N_row = .N_row, denom = denom + ) + + if (is.null(unlist(x_stats))) { + return(NULL) + } + + # Fill in with formatting defaults if needed + .stats <- get_stats("count_patients_with_event", stats_in = .stats) + .formats <- get_formats_from_stats(.stats, .formats) + .labels <- get_labels_from_stats(.stats, .labels) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + + if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]] + x_stats <- x_stats[.stats] + + # Auto format handling + .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var) + + in_rows( + .list = x_stats, + .formats = .formats, + .names = names(.labels), + .labels = unlist(.labels), + .indent_mods = .indent_mods, + .format_na_strs = na_str + ) +} #' @describeIn count_patients_with_event Layout-creating function which can take statistics function #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -120,8 +154,6 @@ a_count_patients_with_event <- make_afun( #' the statistics from `s_count_patients_with_event()` to the table layout. #' #' @examples -#' # `count_patients_with_event()` -#' #' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% #' add_colcounts() %>% @@ -164,40 +196,35 @@ count_patients_with_event <- function(lyt, ..., table_names = vars, .stats = "count_fraction", - .formats = NULL, + .formats = list(count_fraction = format_count_fraction_fixed_dp), .labels = NULL, .indent_mods = NULL) { checkmate::assert_flag(riskdiff) - - s_args <- list(filters = filters, ...) - - afun <- make_afun( - a_count_patients_with_event, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str ) + s_args <- list(filters = filters, ...) - extra_args <- if (isFALSE(riskdiff)) { - s_args + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, s_args) } else { - list( - afun = list("s_count_patients_with_event" = afun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = s_args + extra_args <- c( + extra_args, + list( + afun = list("s_count_patients_with_event" = a_count_patients_with_event), + s_args = s_args + ) ) } analyze( - lyt, - vars, - afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff), + lyt = lyt, + vars = vars, + afun = ifelse(isFALSE(riskdiff), a_count_patients_with_event, afun_riskdiff), + show_labels = ifelse(length(vars) > 1, "visible", "hidden"), + table_names = table_names, na_str = na_str, nested = nested, - extra_args = extra_args, - show_labels = ifelse(length(vars) > 1, "visible", "hidden"), - table_names = table_names + extra_args = extra_args ) } diff --git a/R/count_patients_with_flags.R b/R/count_patients_with_flags.R index 57272b36f0..a8eb6b27a9 100644 --- a/R/count_patients_with_flags.R +++ b/R/count_patients_with_flags.R @@ -14,7 +14,8 @@ #' @inheritParams argument_convention #' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset #' used for counting the number of unique identifiers. -#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. +#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via +#' the `.labels` parameter, the `.labels` values will take precedence and replace these labels. #' @param .stats (`character`)\cr statistics to select for the table. #' #' Options are: ``r shQuote(get_stats("count_patients_with_flags"))`` @@ -101,16 +102,7 @@ s_count_patients_with_flags <- function(df, #' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @examples -#' # We need to ungroup `count_fraction` first so that the `rtables` formatting -#' # function `format_count_fraction()` can be applied correctly. -#' -#' # `a_count_patients_with_flags()` -#' -#' afun <- make_afun(a_count_patients_with_flags, -#' .stats = "count_fraction", -#' .ungroup_stats = "count_fraction" -#' ) -#' afun( +#' a_count_patients_with_flags( #' adae, #' .N_col = 10L, #' .N_row = 10L, @@ -119,10 +111,78 @@ s_count_patients_with_flags <- function(df, #' ) #' #' @export -a_count_patients_with_flags <- make_afun( - s_count_patients_with_flags, - .formats = c("count_fraction" = format_count_fraction_fixed_dp) -) +a_count_patients_with_flags <- function(df, + labelstr = "", + flag_variables, + flag_labels = NULL, + denom = c("n", "N_col", "N_row"), + .N_col, # nolint + .N_row, # nolint + .df_row, + .var = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = default_na_str()) { + x_stats <- s_count_patients_with_flags( + df = df, .var = .var, flag_variables = flag_variables, flag_labels = flag_labels, + .N_col = .N_col, .N_row = .N_row, denom = denom + ) + + if (is.null(unlist(x_stats))) { + return(NULL) + } + x_lvls <- names(x_stats[[1]]) + + # Fill in with formatting defaults if needed + .stats <- get_stats("count_patients_with_flags", stats_in = .stats) + .formats <- get_formats_from_stats(.stats, .formats) + + # label formatting + x_nms <- paste(rep(.stats, each = length(flag_variables)), flag_variables, sep = ".") + new_lbls <- if (!is.null(.labels)) .labels[names(.labels) %in% x_nms] else NULL + .labels <- get_labels_from_stats(.stats, .labels, row_nms = x_lvls) %>% setNames(x_nms) + if (!is.null(new_lbls)) { + which_lbls <- which(names(new_lbls) %in% names(.labels)) + .labels[which_lbls] <- new_lbls + } + + # indent mod formatting + indent_stat_def <- if (any(.stats %in% names(.indent_mods))) { + .indent_mods[.stats[.stats %in% names(.indent_mods)]] + } else { + NULL + } + .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = flag_variables) + .indent_mods <- sapply(names(.indent_mods), function(x) { + if (.indent_mods[x] == 0 && !is.null(length(indent_stat_def))) { + idx <- which(names(indent_stat_def) == gsub("\\..*", "", x)) + if (length(idx) > 0) .indent_mods[[x]] <- indent_stat_def[[idx]] + } + .indent_mods[x] + }) + + if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]] + x_stats <- x_stats[.stats] + + # Ungroup statistics with values for each level of x + x_ungrp <- ungroup_stats(x_stats, .formats, .labels, list()) + x_stats <- x_ungrp[["x"]] %>% setNames(x_nms) + .formats <- x_ungrp[[".formats"]] %>% setNames(x_nms) + + # Auto format handling + .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var) + + in_rows( + .list = x_stats, + .formats = .formats, + .names = names(.labels), + .labels = unlist(.labels), + .indent_mods = .indent_mods, + .format_na_strs = na_str + ) +} #' @describeIn count_patients_with_flags Layout-creating function which can take statistics function #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -133,11 +193,9 @@ a_count_patients_with_flags <- make_afun( #' the statistics from `s_count_patients_with_flags()` to the table layout. #' #' @examples -#' library(dplyr) -#' #' # Add labelled flag variables to analysis dataset. #' adae <- tern_ex_adae %>% -#' mutate( +#' dplyr::mutate( #' fl1 = TRUE %>% with_label("Total AEs"), #' fl2 = (TRTEMFL == "Y") %>% #' with_label("Total number of patients with at least one adverse event"), @@ -147,9 +205,7 @@ a_count_patients_with_flags <- make_afun( #' with_label("Total number of patients with related fatal AEs") #' ) #' -#' # `count_patients_with_flags()` -#' -#' lyt2 <- basic_table() %>% +#' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% #' add_colcounts() %>% #' count_patients_with_flags( @@ -158,7 +214,7 @@ a_count_patients_with_flags <- make_afun( #' denom = "N_col" #' ) #' -#' build_table(lyt2, adae, alt_counts_df = tern_ex_adsl) +#' build_table(lyt, adae, alt_counts_df = tern_ex_adsl) #' #' @export #' @order 2 @@ -174,42 +230,36 @@ count_patients_with_flags <- function(lyt, ..., table_names = paste0("tbl_flags_", var), .stats = "count_fraction", - .formats = NULL, - .indent_mods = NULL) { + .formats = list(count_fraction = format_count_fraction_fixed_dp), + .indent_mods = NULL, + .labels = NULL) { checkmate::assert_flag(riskdiff) - - s_args <- list(flag_variables = flag_variables, flag_labels = flag_labels, ...) - - afun <- make_afun( - a_count_patients_with_flags, - .stats = .stats, - .formats = .formats, - .indent_mods = .indent_mods, - .ungroup_stats = .stats + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str ) + s_args <- list(flag_variables = flag_variables, flag_labels = flag_labels, ...) - extra_args <- if (isFALSE(riskdiff)) { - s_args + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, s_args) } else { - list( - afun = list("s_count_patients_with_flags" = afun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = s_args + extra_args <- c( + extra_args, + list( + afun = list("s_count_patients_with_flags" = a_count_patients_with_flags), + s_args = s_args + ) ) } - lyt <- analyze( + analyze( lyt = lyt, vars = var, + afun = ifelse(isFALSE(riskdiff), a_count_patients_with_flags, afun_riskdiff), var_labels = var_labels, show_labels = show_labels, - afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff), table_names = table_names, na_str = na_str, nested = nested, extra_args = extra_args ) - - lyt } diff --git a/man/count_patients_with_event.Rd b/man/count_patients_with_event.Rd index a9aafc9a6d..3ae18641eb 100644 --- a/man/count_patients_with_event.Rd +++ b/man/count_patients_with_event.Rd @@ -16,7 +16,7 @@ count_patients_with_event( ..., table_names = vars, .stats = "count_fraction", - .formats = NULL, + .formats = list(count_fraction = format_count_fraction_fixed_dp), .labels = NULL, .indent_mods = NULL ) @@ -32,11 +32,18 @@ s_count_patients_with_event( a_count_patients_with_event( df, - .var, + labelstr = "", filters, + denom = c("n", "N_col", "N_row"), .N_col, .N_row, - denom = c("n", "N_col", "N_row") + .df_row, + .var = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = default_na_str() ) } \arguments{ @@ -93,6 +100,12 @@ passed by \code{rtables}.} \item \code{N_row}: total number of values in this row across columns. \item \code{N_col}: total number of values in this column across rows. }} + +\item{labelstr}{(\code{string})\cr label of the level of the parent split currently being summarized +(must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}} +for more information.} + +\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} } \value{ \itemize{ @@ -134,8 +147,6 @@ in \code{count_patients_with_event()}. }} \examples{ -# `count_patients_with_event()` - lyt <- basic_table() \%>\% split_cols_by("ARM") \%>\% add_colcounts() \%>\% @@ -167,8 +178,6 @@ lyt <- basic_table() \%>\% build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) -# `s_count_patients_with_event()` - s_count_patients_with_event( tern_ex_adae, .var = "SUBJID", @@ -189,8 +198,6 @@ s_count_patients_with_event( .N_col = 456 ) -# `a_count_patients_with_event()` - a_count_patients_with_event( tern_ex_adae, .var = "SUBJID", @@ -201,5 +208,5 @@ a_count_patients_with_event( } \seealso{ -\link{count_patients_with_flags} +\code{\link[=count_patients_with_flags]{count_patients_with_flags()}} } diff --git a/man/count_patients_with_flags.Rd b/man/count_patients_with_flags.Rd index 5cade48d70..6666d74065 100644 --- a/man/count_patients_with_flags.Rd +++ b/man/count_patients_with_flags.Rd @@ -19,8 +19,9 @@ count_patients_with_flags( ..., table_names = paste0("tbl_flags_", var), .stats = "count_fraction", - .formats = NULL, - .indent_mods = NULL + .formats = list(count_fraction = format_count_fraction_fixed_dp), + .indent_mods = NULL, + .labels = NULL ) s_count_patients_with_flags( @@ -35,12 +36,19 @@ s_count_patients_with_flags( a_count_patients_with_flags( df, - .var, + labelstr = "", flag_variables, flag_labels = NULL, + denom = c("n", "N_col", "N_row"), .N_col, .N_row, - denom = c("n", "N_col", "N_row") + .df_row, + .var = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = default_na_str() ) } \arguments{ @@ -52,7 +60,8 @@ by a statistics function.} \item{flag_variables}{(\code{character})\cr a vector specifying the names of \code{logical} variables from analysis dataset used for counting the number of unique identifiers.} -\item{flag_labels}{(\code{character})\cr vector of labels to use for flag variables.} +\item{flag_labels}{(\code{character})\cr vector of labels to use for flag variables. If any labels are also specified via +the \code{.labels} parameter, the \code{.labels} values will take precedence and replace these labels.} \item{var_labels}{(\code{character})\cr variable labels.} @@ -83,6 +92,8 @@ information on the \code{"auto"} setting.} \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} \item{.var}{(\code{string})\cr name of the column that contains the unique identifier.} @@ -99,6 +110,12 @@ passed by \code{rtables}.} \item \code{N_row}: total number of values in this row across columns. \item \code{N_col}: total number of values in this column across rows. }} + +\item{labelstr}{(\code{string})\cr label of the level of the parent split currently being summarized +(must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}} +for more information.} + +\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} } \value{ \itemize{ @@ -147,11 +164,9 @@ labeled, variable names will be used instead. Alternatively, a named \code{vecto the label to use for this variable. } \examples{ -library(dplyr) - # Add labelled flag variables to analysis dataset. adae <- tern_ex_adae \%>\% - mutate( + dplyr::mutate( fl1 = TRUE \%>\% with_label("Total AEs"), fl2 = (TRTEMFL == "Y") \%>\% with_label("Total number of patients with at least one adverse event"), @@ -161,9 +176,7 @@ adae <- tern_ex_adae \%>\% with_label("Total number of patients with related fatal AEs") ) -# `count_patients_with_flags()` - -lyt2 <- basic_table() \%>\% +lyt <- basic_table() \%>\% split_cols_by("ARM") \%>\% add_colcounts() \%>\% count_patients_with_flags( @@ -172,7 +185,7 @@ lyt2 <- basic_table() \%>\% denom = "N_col" ) -build_table(lyt2, adae, alt_counts_df = tern_ex_adsl) +build_table(lyt, adae, alt_counts_df = tern_ex_adsl) # `s_count_patients_with_flags()` @@ -184,16 +197,7 @@ s_count_patients_with_flags( .N_col = 1000 ) -# We need to ungroup `count_fraction` first so that the `rtables` formatting -# function `format_count_fraction()` can be applied correctly. - -# `a_count_patients_with_flags()` - -afun <- make_afun(a_count_patients_with_flags, - .stats = "count_fraction", - .ungroup_stats = "count_fraction" -) -afun( +a_count_patients_with_flags( adae, .N_col = 10L, .N_row = 10L, diff --git a/tests/testthat/_snaps/count_patients_with_event.md b/tests/testthat/_snaps/count_patients_with_event.md index ea259c7b9f..2859584d29 100644 --- a/tests/testthat/_snaps/count_patients_with_event.md +++ b/tests/testthat/_snaps/count_patients_with_event.md @@ -34,6 +34,31 @@ [1] 0 +# a_count_patients_with_event works with healthy input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 3 0 n + 2 count 1 0 count + 3 count_fraction 1 (33.3%) 0 count_fraction + 4 count_fraction_fixed_dp 1 (33.3%) 0 count_fraction + 5 n_blq 0 0 n_blq + +# a_count_patients_with_event works with custom input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 count_fraction 1 (33.33%) 1 New label + 2 n 3 3 n + # count_patients_with_event works as expected Code diff --git a/tests/testthat/_snaps/count_patients_with_flags.md b/tests/testthat/_snaps/count_patients_with_flags.md index 4cfcbb9503..0fe626c72b 100644 --- a/tests/testthat/_snaps/count_patients_with_flags.md +++ b/tests/testthat/_snaps/count_patients_with_flags.md @@ -310,6 +310,48 @@ +# a_count_patients_with_flags works with healthy input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n.SER 164 0 Serious AE + 2 n.REL 164 0 Related AE + 3 n.CTC35 164 0 Grade 3-5 AE + 4 n.CTC45 164 0 Grade 4/5 AE + 5 count.SER 128 0 Serious AE + 6 count.REL 137 0 Related AE + 7 count.CTC35 134 0 Grade 3-5 AE + 8 count.CTC45 104 0 Grade 4/5 AE + 9 count_fraction.SER 128 (78%) 0 Serious AE + 10 count_fraction.REL 137 (83.5%) 0 Related AE + 11 count_fraction.CTC35 134 (81.7%) 0 Grade 3-5 AE + 12 count_fraction.CTC45 104 (63.4%) 0 Grade 4/5 AE + 13 count_fraction_fixed_dp.SER 128 (78.0%) 0 Serious AE + 14 count_fraction_fixed_dp.REL 137 (83.5%) 0 Related AE + 15 count_fraction_fixed_dp.CTC35 134 (81.7%) 0 Grade 3-5 AE + 16 count_fraction_fixed_dp.CTC45 104 (63.4%) 0 Grade 4/5 AE + 17 n_blq.SER 0 0 Serious AE + 18 n_blq.REL 0 0 Related AE + 19 n_blq.CTC35 0 0 Grade 3-5 AE + 20 n_blq.CTC45 0 0 Grade 4/5 AE + +# a_count_patients_with_flags works with custom input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 count_fraction.SER 128 (78.05%) 2 New label + 2 count_fraction.REL 137 (83.54%) 3 Related AE + 3 count_fraction.CTC35 134 (81.71%) 1 Grade 3-5 AE + 4 count_fraction.CTC45 104 (63.41%) 1 Grade 4/5 AE + # count_patients_with_flags works as expected Code diff --git a/tests/testthat/test-count_patients_with_event.R b/tests/testthat/test-count_patients_with_event.R index 349f415ddd..aeffa45229 100644 --- a/tests/testthat/test-count_patients_with_event.R +++ b/tests/testthat/test-count_patients_with_event.R @@ -31,6 +31,51 @@ testthat::test_that("s_count_patients_with_event handles multiple columns", { testthat::expect_snapshot(res) }) +testthat::test_that("a_count_patients_with_event works with healthy input.", { + test_data <- data.frame( + SUBJID = c("1001", "1001", "1001", "1002", "1002", "1002", "1003", "1003", "1003"), + ARM = c("A", "A", "A", "A", "A", "A", "B", "B", "B"), + TRTEMFL = c("Y", "", "", "NA", "", "", "Y", "", ""), + AEOUT = c("", "", "", "", "", "", "FATAL", "", "FATAL"), + stringsAsFactors = FALSE + ) + + result <- a_count_patients_with_event( + test_data, + .var = "SUBJID", + filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"), + .N_col = 10, .N_row = 10, .df_row = test_data, + .stats = get_stats("count_patients_with_event") + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("a_count_patients_with_event works with custom input.", { + test_data <- data.frame( + SUBJID = c("1001", "1001", "1001", "1002", "1002", "1002", "1003", "1003", "1003"), + ARM = c("A", "A", "A", "A", "A", "A", "B", "B", "B"), + TRTEMFL = c("Y", "", "", "NA", "", "", "Y", "", ""), + AEOUT = c("", "", "", "", "", "", "FATAL", "", "FATAL"), + stringsAsFactors = FALSE + ) + + result <- a_count_patients_with_event( + test_data, + .var = "SUBJID", + filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"), + .N_col = 10, .N_row = 10, .df_row = test_data, + .stats = c("count_fraction", "n"), + .formats = c(count_fraction = "xx (xx.xx%)"), + .labels = list("count_fraction" = "New label"), + .indent_mods = list("count_fraction" = 1L, "n" = 3L) + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("count_patients_with_event works as expected", { test_data <- data.frame( SUBJID = c("1001", "1001", "1001", "1002", "1002", "1002", "1003", "1003", "1003"), diff --git a/tests/testthat/test-count_patients_with_flags.R b/tests/testthat/test-count_patients_with_flags.R index 47b8a226f1..a2c8f7776f 100644 --- a/tests/testthat/test-count_patients_with_flags.R +++ b/tests/testthat/test-count_patients_with_flags.R @@ -88,6 +88,59 @@ testthat::test_that("s_count_patients_with_flags custom variable label behaviour testthat::expect_snapshot(res) }) +testthat::test_that("a_count_patients_with_flags works with healthy input.", { + options("width" = 100) + + adae_local <- tern_ex_adae %>% + dplyr::mutate( + SER = AESER == "Y", + REL = AEREL == "Y", + CTC35 = AETOXGR %in% c("3", "4", "5"), + CTC45 = AETOXGR %in% c("4", "5") + ) + aesi_vars <- c("SER", "REL", "CTC35", "CTC45") + labels <- c("Serious AE", "Related AE", "Grade 3-5 AE", "Grade 4/5 AE") + + result <- a_count_patients_with_flags( + adae_local, + .var = "USUBJID", + flag_variables = aesi_vars, flag_labels = labels, + .N_col = 10, .N_row = 10, .df_row = raw_data, + .stats = get_stats("count_patients_with_flags") + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("a_count_patients_with_flags works with custom input.", { + options("width" = 100) + + adae_local <- tern_ex_adae %>% + dplyr::mutate( + SER = AESER == "Y", + REL = AEREL == "Y", + CTC35 = AETOXGR %in% c("3", "4", "5"), + CTC45 = AETOXGR %in% c("4", "5") + ) + aesi_vars <- c("SER", "REL", "CTC35", "CTC45") + labels <- c("Serious AE", "Related AE", "Grade 3-5 AE", "Grade 4/5 AE") + + result <- a_count_patients_with_flags( + adae_local, + .var = "USUBJID", + flag_variables = aesi_vars, flag_labels = labels, + .N_col = 10, .N_row = 10, .df_row = raw_data, + .stats = "count_fraction", + .formats = c(count_fraction = "xx (xx.xx%)"), + .labels = list("count_fraction.SER" = "New label"), + .indent_mods = list("count_fraction" = 1L, "SER" = 2L, "count_fraction.REL" = 3L) + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("count_patients_with_flags works as expected", { test_data <- tibble::tibble( SUBJID = c("1001", "1001", "1001", "1002", "1002", "1002", "1003", "1003", "1003"), From 435fd51a95e24eb5524826e093706e238df592c4 Mon Sep 17 00:00:00 2001 From: shajoezhu Date: Sat, 2 Nov 2024 09:50:56 +0000 Subject: [PATCH 2/6] [skip actions] Bump version to 0.9.6.9013 --- .pre-commit-config.yaml | 2 +- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8a92b84bf8..541174fec5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/lorenzwalthert/precommit - rev: v0.4.3.9001 + rev: v0.4.3.9003 hooks: - id: style-files name: Style code with `styler` diff --git a/DESCRIPTION b/DESCRIPTION index 3281139a77..58bcbe7d63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tern Title: Create Common TLGs Used in Clinical Trials -Version: 0.9.6.9012 -Date: 2024-11-01 +Version: 0.9.6.9013 +Date: 2024-11-02 Authors@R: c( person("Joe", "Zhu", , "joe.zhu@roche.com", role = c("aut", "cre")), person("Daniel", "Sabanés Bové", , "daniel.sabanes_bove@roche.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index e13937335e..ce82541bb6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tern 0.9.6.9012 +# tern 0.9.6.9013 ### Enhancements * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`. From a7b7523a5bab9ee63c3a45b4a4bbe932a4f52f25 Mon Sep 17 00:00:00 2001 From: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:30:18 -0500 Subject: [PATCH 3/6] Fix indent modifier bug in `count_patients_with_flags()` (#1350) Fixes #1349 --- R/count_patients_with_flags.R | 16 +++---- .../_snaps/count_patients_with_flags.md | 11 +++++ .../testthat/test-count_patients_with_flags.R | 42 +++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/R/count_patients_with_flags.R b/R/count_patients_with_flags.R index a8eb6b27a9..71f6882b23 100644 --- a/R/count_patients_with_flags.R +++ b/R/count_patients_with_flags.R @@ -155,13 +155,15 @@ a_count_patients_with_flags <- function(df, NULL } .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = flag_variables) - .indent_mods <- sapply(names(.indent_mods), function(x) { - if (.indent_mods[x] == 0 && !is.null(length(indent_stat_def))) { - idx <- which(names(indent_stat_def) == gsub("\\..*", "", x)) - if (length(idx) > 0) .indent_mods[[x]] <- indent_stat_def[[idx]] - } - .indent_mods[x] - }) + if (!is.null(names(.indent_mods))) { + .indent_mods <- sapply(names(.indent_mods), function(x) { + if (.indent_mods[x] == 0 && !is.null(length(indent_stat_def))) { + idx <- which(names(indent_stat_def) == gsub("\\..*", "", x)) + if (length(idx) > 0) .indent_mods[[x]] <- indent_stat_def[[idx]] + } + .indent_mods[x] + }) + } if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]] x_stats <- x_stats[.stats] diff --git a/tests/testthat/_snaps/count_patients_with_flags.md b/tests/testthat/_snaps/count_patients_with_flags.md index 0fe626c72b..98f24e4342 100644 --- a/tests/testthat/_snaps/count_patients_with_flags.md +++ b/tests/testthat/_snaps/count_patients_with_flags.md @@ -467,3 +467,14 @@ SAE 53 (26.2%) 49 (27.7%) 39 (24.1%) -1.4 (-10.4 - 7.5) SAE with fatal outcome 50 (24.8%) 47 (26.6%) 42 (25.9%) -1.8 (-10.6 - 7.0) +# count_patients_with_flags works with single indent mod value + + Code + res + Output + A B + (N=6) (N=4) + —————————————————————————————————————————————————————————————————————————————————————— + Total number of patients with at least one adverse event 1 (16.7%) 1 (25.0%) + Total number of patients with fatal AEs 0 1 (25.0%) + diff --git a/tests/testthat/test-count_patients_with_flags.R b/tests/testthat/test-count_patients_with_flags.R index a2c8f7776f..2c615fe4c3 100644 --- a/tests/testthat/test-count_patients_with_flags.R +++ b/tests/testthat/test-count_patients_with_flags.R @@ -386,3 +386,45 @@ testthat::test_that("count_patients_with_flags works as expected with risk diffe res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + +testthat::test_that("count_patients_with_flags works with single indent mod value", { + test_data <- tibble::tibble( + SUBJID = c("1001", "1001", "1001", "1002", "1002", "1002", "1003", "1003", "1003"), + ARM = factor(c("A", "A", "A", "A", "A", "A", "B", "B", "B"), levels = c("A", "B")), + TRTEMFL = c("Y", "", "", "NA", "", "", "Y", "", ""), + AEOUT = c("", "", "", "", "", "", "FATAL", "", "FATAL") + ) %>% + dplyr::mutate( + flag1 = TRTEMFL == "Y", + flag2 = TRTEMFL == "Y" & AEOUT == "FATAL", + ) + labels <- c( + "A", + "B", + "C", + "D", + "Total number of patients with at least one adverse event", + "Total number of patients with fatal AEs" + ) + formatters::var_labels(test_data) <- labels + + test_adsl_like <- tibble::tibble( + SUBJID = as.character(1001:1010), + ARM = factor(c("A", "A", "B", "B", "A", "A", "A", "B", "B", "A"), levels = c("A", "B")), + stringsAsFactors = FALSE + ) + + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + add_colcounts() %>% + count_patients_with_flags( + "SUBJID", + flag_variables = formatters::var_labels(test_data[, c("flag1", "flag2")]), + denom = "N_col", + .indent_mods = 3L + ) + result <- build_table(lyt, df = test_data, alt_counts_df = test_adsl_like) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) From 4b9e6a6d5be780265d41b54000bb14ba5de8db7d Mon Sep 17 00:00:00 2001 From: edelarua Date: Wed, 6 Nov 2024 21:31:27 +0000 Subject: [PATCH 4/6] [skip actions] Bump version to 0.9.6.9014 --- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 58bcbe7d63..24f34d2428 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tern Title: Create Common TLGs Used in Clinical Trials -Version: 0.9.6.9013 -Date: 2024-11-02 +Version: 0.9.6.9014 +Date: 2024-11-06 Authors@R: c( person("Joe", "Zhu", , "joe.zhu@roche.com", role = c("aut", "cre")), person("Daniel", "Sabanés Bové", , "daniel.sabanes_bove@roche.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index ce82541bb6..ee34e76c3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tern 0.9.6.9013 +# tern 0.9.6.9014 ### Enhancements * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`. From 12de1fb3000342c2ea2d142dfe7c608803627b06 Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Thu, 7 Nov 2024 22:48:57 +0100 Subject: [PATCH 5/6] Refactor `summarize_change()` (#1347) Fixes #1345 --------- Co-authored-by: shajoezhu Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Joe Zhu --- NEWS.md | 8 +- R/summarize_change.R | 139 ++++++++++++----- R/utils_default_stats_formats_labels.R | 69 +++++++-- R/utils_rtables.R | 65 ++++++++ man/default_stats_formats_labels.Rd | 7 +- man/summarize_change.Rd | 40 +++-- man/util_handling_additional_fun_params.Rd | 45 ++++++ tests/testthat/_snaps/summarize_change.md | 17 +++ tests/testthat/test-summarize_change.R | 58 +++++++ .../test-utils_default_stats_formats_labels.R | 11 +- vignettes/tern_functions_guide.Rmd | 142 ++++++++++++++++++ 11 files changed, 527 insertions(+), 74 deletions(-) create mode 100644 man/util_handling_additional_fun_params.Rd create mode 100644 vignettes/tern_functions_guide.Rmd diff --git a/NEWS.md b/NEWS.md index ee34e76c3c..fdb80c33ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,15 +4,19 @@ * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`. * Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`. * Refactored `a_count_occurrences_by_grade()`, `a_count_patients_with_event()`, and `a_count_patients_with_flags()` to no longer use `make_afun()`. - -### Enhancements * Added `rel_height_plot` parameter to `g_lineplot()` to control the line plot height relative to annotation table height. * Updated the `table_font_size` parameter of `g_lineplot()` to control the size of all text in the annotation table, including labels. * Added `as_list` parameter to `g_lineplot()` to allow users to return the line plot and annotation table elements as a list instead of stacked for more complex customization. +* Refactored `summarize_change()` to work without `make_afun()` and access all additional function parameter. +* Added vignette "Understanding `tern` functions" for future reference. ### Bug Fixes * Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables. +### Miscellaneous +* Reverted deprecation of quick get functions `summary_formats()` and `summary_labels()`. Added disclaimer about underlying use of `get_stats`. +* Corrected handling of extra arguments and `NA` for `summarize_change()`. + # tern 0.9.6 ### Enhancements diff --git a/R/summarize_change.R b/R/summarize_change.R index bf76ca7424..832d8647b1 100644 --- a/R/summarize_change.R +++ b/R/summarize_change.R @@ -29,11 +29,12 @@ NULL #' an error will be thrown. #' #' @keywords internal -s_change_from_baseline <- function(df, - .var, - variables, - na.rm = TRUE, # nolint - ...) { +s_change_from_baseline <- function(df, ...) { + # s_summary should get na.rm + args_list <- list(...) + .var <- args_list[[".var"]] + variables <- args_list[["variables"]] + checkmate::assert_numeric(df[[variables$value]]) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[variables$baseline_flag]]) @@ -48,7 +49,7 @@ s_change_from_baseline <- function(df, if (is.logical(combined) && identical(length(combined), 0L)) { combined <- numeric(0) } - s_summary(combined, na.rm = na.rm, ...) + s_summary(combined, ...) } #' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`. @@ -57,25 +58,56 @@ s_change_from_baseline <- function(df, #' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @keywords internal -a_change_from_baseline <- make_afun( - s_change_from_baseline, - .formats = c( - n = "xx", - mean_sd = "xx.xx (xx.xx)", - mean_se = "xx.xx (xx.xx)", - median = "xx.xx", - range = "xx.xx - xx.xx", - mean_ci = "(xx.xx, xx.xx)", - median_ci = "(xx.xx, xx.xx)", - mean_pval = "xx.xx" - ), - .labels = c( - mean_sd = "Mean (SD)", - mean_se = "Mean (SE)", - median = "Median", - range = "Min - Max" +a_change_from_baseline <- function(df, + ..., + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(list(...)$.additional_fun_parameters) + ) + x_stats <- .apply_stat_functions( + default_stat_fnc = s_change_from_baseline, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + df = list(df), + extra_afun_params, + list(...) + ) + ) + + # Fill in with formatting defaults if needed + .stats <- c( + get_stats("analyze_vars_numeric", stats_in = .stats), + names(custom_stat_functions) # Additional stats from custom functions ) -) + .formats <- get_formats_from_stats(.stats, .formats) + .labels <- get_labels_from_stats(.stats, .labels) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + x_stats, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + in_rows( + .list = x_stats[.stats], + .formats = .formats, + .names = names(.labels), + .labels = .labels, + .indent_mods = .indent_mods + ) +} #' @describeIn summarize_change Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -91,7 +123,7 @@ a_change_from_baseline <- make_afun( #' @examples #' library(dplyr) #' -#' ## Fabricate dataset +#' # Fabricate dataset #' dta_test <- data.frame( #' USUBJID = rep(1:6, each = 3), #' AVISIT = rep(paste0("V", 1:3), 6), @@ -119,31 +151,60 @@ a_change_from_baseline <- make_afun( summarize_change <- function(lyt, vars, variables, + var_labels = vars, na_str = default_na_str(), + na_rm = TRUE, nested = TRUE, - ..., + show_labels = "default", table_names = vars, + section_div = NA_character_, + ..., .stats = c("n", "mean_sd", "median", "range"), - .formats = NULL, - .labels = NULL, + .formats = c( + n = "xx", + mean_sd = "xx.xx (xx.xx)", + mean_se = "xx.xx (xx.xx)", + median = "xx.xx", + range = "xx.xx - xx.xx", + mean_ci = "(xx.xx, xx.xx)", + median_ci = "(xx.xx, xx.xx)", + mean_pval = "xx.xx" + ), + .labels = c( + mean_sd = "Mean (SD)", + mean_se = "Mean (SE)", + median = "Median", + range = "Min - Max" + ), .indent_mods = NULL) { - extra_args <- list(variables = variables, ...) + # Extra args must contain .stats, .formats, .labels, .indent_mods - sent to the analysis level + extra_args <- list(".stats" = .stats) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods - afun <- make_afun( - a_change_from_baseline, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + # Adding additional arguments to the analysis function (depends on the specific call) + extra_args <- c(extra_args, "variables" = list(variables), ...) + + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(a_change_from_baseline) <- c( + formals(a_change_from_baseline), + extra_args[[".additional_fun_parameters"]] ) + # Main analysis call - Nothing with .* -> these should be dedicated to the analysis function analyze( - lyt, - vars, - afun = afun, + lyt = lyt, + vars = vars, + var_labels = var_labels, + afun = a_change_from_baseline, na_str = na_str, nested = nested, extra_args = extra_args, - table_names = table_names + inclNAs = na_rm, # adds na.rm = TRUE to the analysis function + show_labels = show_labels, + table_names = table_names, + section_div = section_div ) } diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index bd3871b4b8..b97acf4a08 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -121,6 +121,57 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a out } +# Utility function used to separate custom stats (user-defined functions) from defaults +.split_std_from_custom_stats <- function(stats_in) { + out <- list(default_stats = NULL, custom_stats = NULL) + if (is.list(stats_in)) { + is_custom_fnc <- sapply(stats_in, is.function) + checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") + out[["custom_stats"]] <- stats_in[is_custom_fnc] + out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) + } else { + out[["default_stats"]] <- stats_in + } + + out +} + +# Utility function to apply statistical functions +.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) { + # Default checks + checkmate::assert_function(default_stat_fnc) + checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named") + checkmate::assert_list(args_list) + + # Checking custom stats have same formals + if (!is.null(custom_stat_fnc_list)) { + fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] + for (fnc in custom_stat_fnc_list) { + if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) { + stop( + "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ", + "as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]], + " as first parameter, while the default function has ", fundamental_call_to_data, "." + ) + } + if (!any(names(formals(fnc)) == "...")) { + stop( + "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ", + "In this case your custom function does not have `...`." + ) + } + } + } + + # Merging + stat_fnc_list <- c(default_stat_fnc, custom_stat_fnc_list) + + # Applying + out <- unlist(lapply(stat_fnc_list, function(fnc) do.call(fnc, args = args_list)), recursive = FALSE) + + out +} + #' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. #' To check available defaults see `tern::tern_default_formats` list. #' @@ -496,9 +547,7 @@ tern_default_labels <- c( rate_ratio = "Adjusted Rate Ratio" ) -# To deprecate --------- - -#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")` +#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")` #' Quick function to retrieve default formats for summary statistics: #' [analyze_vars()] and [analyze_vars_in_cols()] principally. #' @@ -513,20 +562,20 @@ tern_default_labels <- c( #' #' @export summary_formats <- function(type = "numeric", include_pval = FALSE) { - lifecycle::deprecate_warn( - "0.9.6", "summary_formats()", - details = 'Use get_formats_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead' - ) met_grp <- paste0(c("analyze_vars", type), collapse = "_") get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) } -#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")` +#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")` #' Quick function to retrieve default labels for summary statistics. #' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`. #' #' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()]. #' +#' @details +#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or +#' `get_formats_from_stats` respectively to retrieve relevant information. +#' #' @return #' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. #' @@ -536,10 +585,6 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) { #' #' @export summary_labels <- function(type = "numeric", include_pval = FALSE) { - lifecycle::deprecate_warn( - "0.9.6", "summary_formats()", - details = 'Use get_labels_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead' - ) met_grp <- paste0(c("analyze_vars", type), collapse = "_") get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) } diff --git a/R/utils_rtables.R b/R/utils_rtables.R index 61dd4b5f3e..26c45543db 100644 --- a/R/utils_rtables.R +++ b/R/utils_rtables.R @@ -467,3 +467,68 @@ set_default_na_str <- function(na_str) { checkmate::assert_character(na_str, len = 1, null.ok = TRUE) options("tern_default_na_str" = na_str) } + + +#' Utilities to handle extra arguments in analysis functions +#' +#' @description `r lifecycle::badge("stable")` +#' Important additional parameters, useful to modify behavior of analysis and summary +#' functions are listed in [rtables::additional_fun_params]. With these utility functions +#' we can retrieve a curated list of these parameters from the environment, and pass them +#' to the analysis functions with dedicated `...`; notice that the final `s_*` function +#' will get them through argument matching. +#' +#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be +#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params]. +#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row` +#' parameters. +#' +#' @name util_handling_additional_fun_params +NULL + +#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment. +#' +#' @return +#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment. +#' +#' @keywords internal +retrieve_extra_afun_params <- function(extra_afun_params) { + out <- list() + for (extra_param in extra_afun_params) { + out <- c(out, list(get(extra_param, envir = parent.frame()))) + } + setNames(out, extra_afun_params) +} + +#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for +#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions. +#' +#' @return +#' * `get_additional_afun_params` returns a list of additional parameters. +#' +#' @keywords internal +get_additional_afun_params <- function(add_alt_df = FALSE) { + out_list <- list( + .N_col = integer(), + .N_total = integer(), + .N_row = integer(), + .df_row = data.frame(), + .var = character(), + .ref_group = character(), + .ref_full = vector(mode = "numeric"), + .in_ref_col = logical(), + .spl_context = data.frame(), + .all_col_exprs = vector(mode = "expression"), + .all_col_counts = vector(mode = "integer") + ) + + if (isTRUE(add_alt_df)) { + out_list <- c( + out_list, + .alt_df_row = data.frame(), + .alt_df = data.frame() + ) + } + + out_list +} diff --git a/man/default_stats_formats_labels.Rd b/man/default_stats_formats_labels.Rd index 228288a6f8..3fbcad8fad 100644 --- a/man/default_stats_formats_labels.Rd +++ b/man/default_stats_formats_labels.Rd @@ -120,6 +120,9 @@ seen in \code{\link[=analyze_vars]{analyze_vars()}}. See notes to understand why } \details{ Current choices for \code{type} are \code{counts} and \code{numeric} for \code{\link[=analyze_vars]{analyze_vars()}} and affect \code{get_stats()}. + +\verb{summary_*} quick get functions for labels or formats uses \code{get_stats} and \code{get_labels_from_stats} or +\code{get_formats_from_stats} respectively to retrieve relevant information. } \section{Functions}{ \itemize{ @@ -142,11 +145,11 @@ It defaults to 0L for all values. \item \code{tern_default_labels}: Named \code{character} vector of default labels for \code{tern}. -\item \code{summary_formats()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +\item \code{summary_formats()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Quick function to retrieve default formats for summary statistics: \code{\link[=analyze_vars]{analyze_vars()}} and \code{\link[=analyze_vars_in_cols]{analyze_vars_in_cols()}} principally. -\item \code{summary_labels()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +\item \code{summary_labels()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Quick function to retrieve default labels for summary statistics. Returns labels of descriptive statistics which are understood by \code{rtables}. Similar to \code{summary_formats}. diff --git a/man/summarize_change.Rd b/man/summarize_change.Rd index 40b05f423c..b37cdd761c 100644 --- a/man/summarize_change.Rd +++ b/man/summarize_change.Rd @@ -10,19 +10,33 @@ summarize_change( lyt, vars, variables, + var_labels = vars, na_str = default_na_str(), + na_rm = TRUE, nested = TRUE, - ..., + show_labels = "default", table_names = vars, + section_div = NA_character_, + ..., .stats = c("n", "mean_sd", "median", "range"), - .formats = NULL, - .labels = NULL, + .formats = c(n = "xx", mean_sd = "xx.xx (xx.xx)", mean_se = "xx.xx (xx.xx)", median = + "xx.xx", range = "xx.xx - xx.xx", mean_ci = "(xx.xx, xx.xx)", median_ci = + "(xx.xx, xx.xx)", mean_pval = "xx.xx"), + .labels = c(mean_sd = "Mean (SD)", mean_se = "Mean (SE)", median = "Median", range = + "Min - Max"), .indent_mods = NULL ) -s_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) +s_change_from_baseline(df, ...) -a_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) +a_change_from_baseline( + df, + ..., + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL +) } \arguments{ \item{lyt}{(\code{PreDataTableLayouts})\cr layout that analyses will be added to.} @@ -31,17 +45,24 @@ a_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) \item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} +\item{var_labels}{(\code{character})\cr variable labels.} + \item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} \item{nested}{(\code{flag})\cr whether this layout instruction should be applied within the existing layout structure _if possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} -\item{...}{additional arguments for the lower level functions.} +\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{...}{additional arguments for the lower level functions.} + \item{.stats}{(\code{character})\cr statistics to select for the table. Options are: \verb{'n', 'sum', 'mean', 'sd', 'se', 'mean_sd', 'mean_se', 'mean_ci', 'mean_sei', 'mean_sdi', 'mean_pval', 'median', 'mad', 'median_ci', 'quantiles', 'iqr', 'range', 'min', 'max', 'median_range', 'cv', 'geom_mean', 'geom_mean_ci', 'geom_cv'}} @@ -55,11 +76,6 @@ information on the \code{"auto"} setting.} unmodified default behavior. Can be negative.} \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} - -\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested -by a statistics function.} - -\item{na.rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} } \value{ \itemize{ @@ -108,7 +124,7 @@ an error will be thrown. \examples{ library(dplyr) -## Fabricate dataset +# Fabricate dataset dta_test <- data.frame( USUBJID = rep(1:6, each = 3), AVISIT = rep(paste0("V", 1:3), 6), diff --git a/man/util_handling_additional_fun_params.Rd b/man/util_handling_additional_fun_params.Rd new file mode 100644 index 0000000000..ce95abe6dc --- /dev/null +++ b/man/util_handling_additional_fun_params.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_rtables.R +\name{util_handling_additional_fun_params} +\alias{util_handling_additional_fun_params} +\alias{retrieve_extra_afun_params} +\alias{get_additional_afun_params} +\title{Utilities to handle extra arguments in analysis functions} +\usage{ +retrieve_extra_afun_params(extra_afun_params) + +get_additional_afun_params(add_alt_df = FALSE) +} +\arguments{ +\item{extra_afun_params}{(\code{list})\cr list of additional parameters (\code{character}) to be +retrieved from the environment. Curated list is present in \link[rtables:additional_fun_params]{rtables::additional_fun_params}.} + +\item{add_alt_df}{(\code{logical})\cr if \code{TRUE}, the function will also add \code{.alt_df} and \code{.alt_df_row} +parameters.} +} +\value{ +\itemize{ +\item \code{retrieve_extra_afun_params} returns a list of the values of the parameters in the environment. +} + +\itemize{ +\item \code{get_additional_afun_params} returns a list of additional parameters. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +Important additional parameters, useful to modify behavior of analysis and summary +functions are listed in \link[rtables:additional_fun_params]{rtables::additional_fun_params}. With these utility functions +we can retrieve a curated list of these parameters from the environment, and pass them +to the analysis functions with dedicated \code{...}; notice that the final \verb{s_*} function +will get them through argument matching. +} +\section{Functions}{ +\itemize{ +\item \code{retrieve_extra_afun_params()}: Retrieve additional parameters from the environment. + +\item \code{get_additional_afun_params()}: Curated list of additional parameters for +analysis functions. Please check \link[rtables:additional_fun_params]{rtables::additional_fun_params} for precise descriptions. + +}} +\keyword{internal} diff --git a/tests/testthat/_snaps/summarize_change.md b/tests/testthat/_snaps/summarize_change.md index b0e8774d89..9752712464 100644 --- a/tests/testthat/_snaps/summarize_change.md +++ b/tests/testthat/_snaps/summarize_change.md @@ -499,3 +499,20 @@ Median -2.00 Min - Max -2.00 - -2.00 +# summarize_change works with custom statistical functions + + Code + res + Output + all obs + ——————————————————— + V1 + n 3 + my_stat 1.00 + V2 + n 3 + my_stat 0.83 + V3 + n 3 + my_stat 0.67 + diff --git a/tests/testthat/test-summarize_change.R b/tests/testthat/test-summarize_change.R index 4c8d13a92e..adf60ffc68 100644 --- a/tests/testthat/test-summarize_change.R +++ b/tests/testthat/test-summarize_change.R @@ -77,3 +77,61 @@ testthat::test_that("summarize_change works as expected", { res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + + +testthat::test_that("summarize_change works with custom statistical functions", { + dta_test <- data.frame( + USUBJID = rep(1:6, each = 3), + AVISIT = rep(paste0("V", 1:3), 6), + AVAL = c(9:1, rep(NA, 9)) + ) %>% + dplyr::mutate( + ABLFLL = AVISIT == "V1" + ) %>% + dplyr::group_by(USUBJID) %>% + dplyr::mutate( + BLVAL = AVAL[ABLFLL], + CHG = AVAL - BLVAL + ) %>% + dplyr::ungroup() + + testthat::expect_error( + basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(x) mean(x)) + ) %>% + build_table(dta_test), + "custom function has x as first parameter, while the default function has df" + ) + testthat::expect_error( + basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df) mean(df$AVAL)) + ) %>% + build_table(dta_test), + "The custom statistical function needs to have " + ) + + result <- basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df, ...) { + a <- mean(df$AVAL, na.rm = TRUE) + b <- list(...)$.N_row + a / b + }), + .formats = c("my_stat" = function(x, ...) sprintf("%.2f", x)) + ) %>% + build_table(dta_test) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) diff --git a/tests/testthat/test-utils_default_stats_formats_labels.R b/tests/testthat/test-utils_default_stats_formats_labels.R index f040eb0ef2..9583faac18 100644 --- a/tests/testthat/test-utils_default_stats_formats_labels.R +++ b/tests/testthat/test-utils_default_stats_formats_labels.R @@ -199,24 +199,21 @@ testthat::test_that("labels_use_control works as expected", { }) testthat::test_that("summary_formats works as expected", { - testthat::expect_warning( - result <- summary_formats() %>% - unlist() # More compact fruition - ) + result <- summary_formats() %>% unlist() # More compact fruition res <- testthat::expect_silent(result) testthat::expect_snapshot(res) - testthat::expect_warning(result <- summary_formats(type = "counts", include_pval = TRUE)) + result <- summary_formats(type = "counts", include_pval = TRUE) testthat::expect_true(all(result[c("n", "count", "n_blq")] == "xx.")) testthat::expect_identical(result[["pval_counts"]], "x.xxxx | (<0.0001)") }) testthat::test_that("summary_labels works as expected", { - testthat::expect_warning(result <- summary_labels()) + result <- summary_labels() res <- testthat::expect_silent(result) testthat::expect_snapshot(res) - testthat::expect_warning(result <- summary_labels(type = "counts", include_pval = TRUE)) + result <- summary_labels(type = "counts", include_pval = TRUE) res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) diff --git a/vignettes/tern_functions_guide.Rmd b/vignettes/tern_functions_guide.Rmd new file mode 100644 index 0000000000..6dc8e2377f --- /dev/null +++ b/vignettes/tern_functions_guide.Rmd @@ -0,0 +1,142 @@ +--- +title: "Understanding `tern` functions" +date: "2024-11-04" +output: + rmarkdown::html_document: + theme: "spacelab" + highlight: "kate" + toc: true + toc_float: true +vignette: > + %\VignetteIndexEntry{Understanding `tern` functions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 72 +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Understanding `tern` functions + +Every function in the `tern` package is designed to have a certain structure that can cooperate well with every user's need, while maintaining a consistent and predictable behavior. This document will guide you through an example function in the package, explaining the purpose of many of its building blocks and how they can be used. + +As we recently worked on it we will consider `summarize_change()` as an example. This function is used to calculate the change from a baseline value for a given variable. A realistic example can be found in [`LBT03`](https://insightsengineering.github.io/tlg-catalog/stable/tables/lab-results/lbt03.html) from the TLG-catalog. + +`summarize_change()` is the main function that is available to the user. You can find lists of these functions in `?tern::analyze_functions`. All of these are build around `rtables::analyze()` function, which is the core analysis function in `rtables`. All these wrapper functions call specific analysis functions (always written as `a_*`) that are meant to handle the statistic functions (always written as `s_*`) and format the results with the `rtables::in_row()` function. We can summarize this structure as follows: + +`summarize_change()` (1)-> `a_change_from_baseline()` (2)-> [`s_change_from_baseline()` + `rtables::in_row()`] + +The main questions that may arise are: + +1. Handling of `NA`. +2. Handling of formats. +3. Additional statistics. + +Data set and library loading. +```{r} +library(dplyr) +library(tern) + +## Fabricate dataset +dta_test <- data.frame( + USUBJID = rep(1:6, each = 3), + AVISIT = rep(paste0("V", 1:3), 6), + ARM = rep(LETTERS[1:3], rep(6, 3)), + AVAL = c(9:1, rep(NA, 9)) +) %>% + mutate(ABLFLL = AVISIT == "V1") %>% + group_by(USUBJID) %>% + mutate( + BLVAL = AVAL[ABLFLL], + CHG = AVAL - BLVAL + ) %>% + ungroup() +``` + +Classic use of `summarize_change()`. +```{r} +fix_layout <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("AVISIT") + + +# Dealing with NAs: na_rm = TRUE +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>% + build_table(dta_test) %>% + print() + +# Dealing with NAs: na_rm = FALSE +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL"), na_rm = FALSE) %>% + build_table(dta_test) %>% + print() + +# changing the NA string (it is done on all levels) +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL"), na_str = "my_na") %>% + build_table(dta_test) %>% + print() +``` + +`.formats`, `.labels`, and `.indent_mods` depend on the names of `.stats`. Here is how you can change the default formatting. + +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change("CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "mean"), # reducing the number of stats for visual appreciation + .formats = c(n = "xx.xx"), + .labels = c(n = "NnNn"), + .indent_mods = c(n = 5), na_str = "nA" + ) %>% + build_table(dta_test) %>% + print() +``` +What if I want something special for the format? + +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change("CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "mean"), # reducing the number of stats for visual appreciation + .formats = c(n = function(x, ...) as.character(x * 100)) + ) %>% # Note you need ...!!! + build_table(dta_test) %>% + print() +``` + +Adding a custom statistic (and custom format): +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df, ...) { + a <- mean(df$AVAL, na.rm = TRUE) + b <- list(...)$.N_row # It has access at all `?rtables::additional_fun_params` + a / b + }), + .formats = c("my_stat" = function(x, ...) sprintf("%.2f", x)) + ) %>% + build_table(dta_test) +``` + + +## For Developers + +In all of these layers there are specific parameters that need to be available, and, while `rtables` has multiple way to handle formatting and `NA` values, we had to decide how to correctly handle these and additional extra arguments. We follow the following scheme: + +Level 1: `summarize_change()`: all parameters without a starting dot `.*` are used or added to `extra_args`. Specifically, here we solve `NA` values by using `inclNAs` option in `rtables::analyze()`. This will add to `...` `na.rm = inclNAs`. Also `na_str` is here set. We may want to be statistic dependent in the future, but we still need to think how to accomplish that. We add the `rtables::additional_fun_params` to the analysis function so to make them available as `...` in the next level. + +Level 2: `a_change_from_baseline()`: all parameters starting with a dot `.` are used. Mainly `.stats`, `.formats`, `.labels`, and `.indent_mods` are used. We also add `extra_afun_params` to the `...` list for the statistical function. Notice the handling for additional parameters in the `do.call()` function. From f5f706cb306ac65b20e9874492d588af7f4efba0 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 7 Nov 2024 21:50:10 +0000 Subject: [PATCH 6/6] [skip actions] Bump version to 0.9.6.9015 --- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 24f34d2428..8d8b2bbe06 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tern Title: Create Common TLGs Used in Clinical Trials -Version: 0.9.6.9014 -Date: 2024-11-06 +Version: 0.9.6.9015 +Date: 2024-11-07 Authors@R: c( person("Joe", "Zhu", , "joe.zhu@roche.com", role = c("aut", "cre")), person("Daniel", "Sabanés Bové", , "daniel.sabanes_bove@roche.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index fdb80c33ad..7e8b78f29d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tern 0.9.6.9014 +# tern 0.9.6.9015 ### Enhancements * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`.