Add denom argument to count_* functions

Fix tests Add tests Update docs Update NEWS Fix test Account for NAs Fix example Fix tests g_km snapshots were losing nestcolor theme Fix warning Failing test Comment Clearer examples manual vbump Pkgdown site improvements (#1328) Fixes #1327 resolve conflict
insightsengineering · Oct 22, 2024 · 40335d8 · 40335d8
1 parent eae5ff2
commit 40335d8
Show file tree

Hide file tree

Showing 41 changed files with 474 additions and 388 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tern
 Title: Create Common TLGs Used in Clinical Trials
-Version: 0.9.6.9005
-Date: 2024-10-09
+Version: 0.9.6.9007
+Date: 2024-10-18
 Authors@R: c(
     person("Joe", "Zhu", , "[email protected]", role = c("aut", "cre")),
     person("Daniel", "Sabanés Bové", , "[email protected]", role = "aut"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,8 @@
-# tern 0.9.6.9005
+# tern 0.9.6.9007
+
+### Enhancements
+* Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`.
+* Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`.
 
 ### Bug Fixes
 * Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables.
@@ -15,7 +19,7 @@
 * Refactored `estimate_incidence_rate` to work as both an analyze function and a summarize function, controlled by the added `summarize` parameter. When `summarize = TRUE`, labels can be fine-tuned via the new `label_fmt` argument to the same function.
 * Added `fraction` statistic to the `analyze_var_count` method group.
 * Improved `summarize_glm_count()` documentation and all its associated functions to better describe the results and the functions' purpose.
-* Added `method` argument to `s_odds_ratio()` and `estimate_odds_ratio()` to control whether exact or approximate conditional likelihood calculations are used. 
+* Added `method` argument to `s_odds_ratio()` and `estimate_odds_ratio()` to control whether exact or approximate conditional likelihood calculations are used.
 
 ### Bug Fixes
 * Added defaults for `d_count_cumulative` parameters as described in the documentation.
@@ -72,7 +76,7 @@
 ### Miscellaneous
 * Added function `expect_snapshot_ggplot` to test setup file to process plot snapshot tests and allow plot dimensions to be set.
 * Adapted to argument renames introduced in `ggplot2` 3.5.0.
-* Renamed `individual_patient_plot.R` to `g_ipp.R`. 
+* Renamed `individual_patient_plot.R` to `g_ipp.R`.
 * Removed all instances of deprecated parameters `time_unit_input`, `time_unit_output`, `na_level` and `indent_mod`.
 * Removed deprecated functions `summarize_vars`, `control_summarize_vars`, `a_compare`, `create_afun_summary`, `create_afun_compare`, and `summary_custom`.
 * Removed `vdiffr` package from Suggests in DESCRIPTION file.

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
@@ -236,11 +236,6 @@ s_summary.numeric <- function(x,
 
 #' @describeIn analyze_variables Method for `factor` class.
 #'
-#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
-#'   * `n`: number of values in this row and column intersection.
-#'   * `N_row`: total number of values in this row across columns.
-#'   * `N_col`: total number of values in this column across rows.
-#'
 #' @return
 #'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
 #'     * `n`: The [length()] of `x`.
@@ -281,12 +276,11 @@ s_summary.numeric <- function(x,
 #' @export
 s_summary.factor <- function(x,
                              na.rm = TRUE, # nolint
-                             denom = c("n", "N_row", "N_col"),
+                             denom = c("n", "N_col", "N_row"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
   assert_valid_factor(x)
-  denom <- match.arg(denom)
 
   if (na.rm) {
     x <- x[!is.na(x)] %>% fct_discard("<Missing>")
@@ -299,20 +293,23 @@ s_summary.factor <- function(x,
   y$n <- length(x)
 
   y$count <- as.list(table(x, useNA = "ifany"))
-  dn <- switch(denom,
-    n = length(x),
-    N_row = .N_row,
-    N_col = .N_col
-  )
+
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+
   y$count_fraction <- lapply(
     y$count,
     function(x) {
-      c(x, ifelse(dn > 0, x / dn, 0))
+      c(x, ifelse(denom > 0, x / denom, 0))
     }
   )
   y$fraction <- lapply(
     y$count,
-    function(count) c("num" = count, "denom" = dn)
+    function(count) c("num" = count, "denom" = denom)
   )
 
   y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))
@@ -344,7 +341,7 @@ s_summary.factor <- function(x,
 #' @export
 s_summary.character <- function(x,
                                 na.rm = TRUE, # nolint
-                                denom = c("n", "N_row", "N_col"),
+                                denom = c("n", "N_col", "N_row"),
                                 .N_row, # nolint
                                 .N_col, # nolint
                                 .var,
@@ -368,11 +365,6 @@ s_summary.character <- function(x,
 
 #' @describeIn analyze_variables Method for `logical` class.
 #'
-#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
-#'   * `n`: number of values in this row and column intersection.
-#'   * `N_row`: total number of values in this row across columns.
-#'   * `N_col`: total number of values in this column across rows.
-#'
 #' @return
 #'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
 #'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
@@ -404,22 +396,22 @@ s_summary.character <- function(x,
 #' @export
 s_summary.logical <- function(x,
                               na.rm = TRUE, # nolint
-                              denom = c("n", "N_row", "N_col"),
+                              denom = c("n", "N_col", "N_row"),
                               .N_row, # nolint
                               .N_col, # nolint
                               ...) {
-  denom <- match.arg(denom)
   if (na.rm) x <- x[!is.na(x)]
   y <- list()
   y$n <- length(x)
   count <- sum(x, na.rm = TRUE)
-  dn <- switch(denom,
-    n = length(x),
-    N_row = .N_row,
-    N_col = .N_col
-  )
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
   y$count <- count
-  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, 0))
+  y$count_fraction <- c(count, ifelse(denom > 0, count / denom, 0))
   y$n_blq <- 0L
   y
 }

diff --git a/R/argument_convention.R b/R/argument_convention.R
@@ -30,6 +30,10 @@
 #' @param col_by (`factor`)\cr defining column groups.
 #' @param conf_level (`proportion`)\cr confidence level of the interval.
 #' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
+#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
+#'   * `n`: number of values in this row and column intersection.
+#'   * `N_row`: total number of values in this row across columns.
+#'   * `N_col`: total number of values in this column across rows.
 #' @param df (`data.frame`)\cr data set containing all analysis variables.
 #' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
 #'   list, which specifies the new group levels via the names and the

diff --git a/R/bland_altman.R b/R/bland_altman.R
@@ -2,29 +2,32 @@
 #'
 #' @description `r lifecycle::badge("experimental")`
 #'
-#' Functions that use the Bland-Altman method to assess the agreement between two numerical vectors.
+#' Statistics function that uses the Bland-Altman method to assess the agreement between two numerical vectors
+#' and calculates a variety of statistics.
 #'
 #' @inheritParams argument_convention
 #' @param y (`numeric`)\cr vector of numbers we want to analyze, to be compared with `x`.
 #'
-#' @name bland_altman
-NULL
-
-#' @describeIn bland_altman Statistics function that compares two numeric vectors using the Bland-Altman method
-#'   and calculates a variety of statistics.
-#'
 #' @return
-#' * `s_bland_altman()` returns a named list of the following elements: `df`, `difference_mean`, `ci_mean`,
-#'   `difference_sd`, `difference_se`, `upper_agreement_limit`, `lower_agreement_limit`, `agreement_limit_se`,
-#'   `upper_agreement_limit_ci`, `lower_agreement_limit_ci`, `t_value`, and `n`.
+#' A named list of the following elements:
+#'   * `df`
+#'   * `difference_mean`
+#'   * `ci_mean`
+#'   * `difference_sd`
+#'   * `difference_se`
+#'   * `upper_agreement_limit`
+#'   * `lower_agreement_limit`
+#'   * `agreement_limit_se`
+#'   * `upper_agreement_limit_ci`
+#'   * `lower_agreement_limit_ci`
+#'   * `t_value`
+#'   * `n`
 #'
 #' @examples
 #' x <- seq(1, 60, 5)
 #' y <- seq(5, 50, 4)
-#' conf_level <- 0.9
 #'
-#' # Derive statistics that are needed for Bland-Altman plot
-#' s_bland_altman(x, y, conf_level = conf_level)
+#' s_bland_altman(x, y, conf_level = 0.9)
 #'
 #' @export
 s_bland_altman <- function(x, y, conf_level = 0.95) {
@@ -75,16 +78,24 @@ s_bland_altman <- function(x, y, conf_level = 0.95) {
   )
 }
 
-#' @describeIn bland_altman Graphing function that produces a Bland-Altman plot.
+#' Bland-Altman plot
 #'
-#' @return
-#' * `g_bland_altman()` returns a `ggplot` Bland-Altman plot.
+#' @description `r lifecycle::badge("experimental")`
+#'
+#' Graphing function that produces a Bland-Altman plot.
+#'
+#' @inheritParams s_bland_altman
+#'
+#' @return A `ggplot` Bland-Altman plot.
 #'
 #' @examples
-#' # Create a Bland-Altman plot
-#' g_bland_altman(x = x, y = y, conf_level = conf_level)
+#' x <- seq(1, 60, 5)
+#' y <- seq(5, 50, 4)
+#'
+#' g_bland_altman(x = x, y = y, conf_level = 0.9)
 #'
 #' @export
+#' @aliases bland_altman
 g_bland_altman <- function(x, y, conf_level = 0.95) {
   result_tem <- s_bland_altman(x, y, conf_level = conf_level)
   xpos <- max(result_tem$df$average) * 0.9 + min(result_tem$df$average) * 0.1

diff --git a/R/count_cumulative.R b/R/count_cumulative.R
@@ -77,7 +77,10 @@ h_count_cumulative <- function(x,
     length(x[is_keep & x > threshold])
   }
 
-  result <- c(count = count, fraction = count / .N_col)
+  result <- c(
+    count = count,
+    fraction = if (count == 0 && .N_col == 0) 0 else count / .N_col
+  )
   result
 }
 
@@ -111,11 +114,20 @@ s_count_cumulative <- function(x,
                                lower_tail = TRUE,
                                include_eq = TRUE,
                                .N_col, # nolint
+                               .N_row, # nolint
+                               denom = c("N_col", "n", "N_row"),
                                ...) {
   checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)
 
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+
   count_fraction_list <- Map(function(thres) {
-    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
+    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = denom, ...)
     label <- d_count_cumulative(thres, lower_tail, include_eq)
     formatters::with_label(result, label)
   }, thresholds)

diff --git a/R/count_missed_doses.R b/R/count_missed_doses.R
@@ -57,13 +57,17 @@ d_count_missed_doses <- function(thresholds) {
 #' @keywords internal
 s_count_missed_doses <- function(x,
                                  thresholds,
-                                 .N_col) { # nolint
+                                 .N_col, # nolint
+                                 .N_row, # nolint
+                                 denom = c("N_col", "n", "N_row")) {
   stat <- s_count_cumulative(
     x = x,
     thresholds = thresholds,
     lower_tail = FALSE,
     include_eq = TRUE,
-    .N_col = .N_col
+    .N_col = .N_col,
+    .N_row = .N_row,
+    denom = denom
   )
   labels <- d_count_missed_doses(thresholds)
   for (i in seq_along(stat$count_fraction)) {

diff --git a/R/count_occurrences.R b/R/count_occurrences.R
@@ -50,9 +50,10 @@ NULL
 #' @describeIn count_occurrences Statistics function which counts number of patients that report an
 #' occurrence.
 #'
-#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
-#'   - `N_col`: total number of patients in this column across rows
-#'   - `n`: number of patients with any occurrences
+#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
+#'   * `N_col`: total number of patients in this column across rows.
+#'   * `n`: number of patients with any occurrences.
+#'   * `N_row`: total number of patients in this row across columns.
 #'
 #' @return
 #' * `s_count_occurrences()` returns a list with:
@@ -65,15 +66,17 @@ NULL
 #' s_count_occurrences(
 #'   df,
 #'   .N_col = 4L,
+#'   .N_row = 4L,
 #'   .df_row = df,
 #'   .var = "MHDECOD",
 #'   id = "USUBJID"
 #' )
 #'
 #' @export
 s_count_occurrences <- function(df,
-                                denom = c("N_col", "n"),
+                                denom = c("N_col", "n", "N_row"),
                                 .N_col, # nolint
+                                .N_row, # nolint
                                 .df_row,
                                 drop = TRUE,
                                 .var = "MHDECOD",
@@ -83,7 +86,6 @@ s_count_occurrences <- function(df,
   checkmate::assert_count(.N_col)
   checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
   checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
-  denom <- match.arg(denom)
 
   occurrences <- if (drop) {
     # Note that we don't try to preserve original level order here since a) that would required
@@ -100,10 +102,12 @@ s_count_occurrences <- function(df,
     df[[.var]]
   }
   ids <- factor(df[[id]])
-  dn <- switch(denom,
-    n = nlevels(ids),
-    N_col = .N_col
-  )
+  denom <- match.arg(denom) %>%
+    switch(
+      n = nlevels(ids),
+      N_row = .N_row,
+      N_col = .N_col
+    )
   has_occurrence_per_id <- table(occurrences, ids) > 0
   n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
   list(
@@ -117,12 +121,12 @@ s_count_occurrences <- function(df,
           c(i, i / denom)
         }
       },
-      denom = dn
+      denom = denom
     ),
     fraction = lapply(
       n_ids_per_occurrence,
       function(i, denom) c("num" = i, "denom" = denom),
-      denom = dn
+      denom = denom
     )
   )
 }
@@ -146,9 +150,10 @@ s_count_occurrences <- function(df,
 a_count_occurrences <- function(df,
                                 labelstr = "",
                                 id = "USUBJID",
-                                denom = c("N_col", "n"),
+                                denom = c("N_col", "n", "N_row"),
                                 drop = TRUE,
                                 .N_col, # nolint
+                                .N_row, # nolint
                                 .var = NULL,
                                 .df_row = NULL,
                                 .stats = NULL,
@@ -158,7 +163,7 @@ a_count_occurrences <- function(df,
                                 na_str = default_na_str()) {
   denom <- match.arg(denom)
   x_stats <- s_count_occurrences(
-    df = df, denom = denom, .N_col = .N_col, .df_row = .df_row, drop = drop, .var = .var, id = id
+    df = df, denom = denom, .N_col = .N_col, .N_row = .N_row, .df_row = .df_row, drop = drop, .var = .var, id = id
   )
   if (is.null(unlist(x_stats))) {
     return(NULL)