From 715f42df6c0b98b52cd828fd3a14e46fec8086f1 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 18 Oct 2024 18:22:56 -0400
Subject: [PATCH] Refactor `a_count_occurrences_by_grade()`

Fix check
---
 NEWS.md                                       |   3 +
 R/count_occurrences_by_grade.R                | 150 +++++++++++++-----
 man/count_occurrences_by_grade.Rd             |  42 +++--
 .../_snaps/count_occurrences_by_grade.md      |  97 +++++++++--
 .../test-count_occurrences_by_grade.R         |  63 ++++++++
 5 files changed, 292 insertions(+), 63 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e8b56db2bc..483ec81671 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,8 @@
 # tern 0.9.6.9007
 
+### Enhancements
+* Refactored `a_count_occurrences_by_grade()` to no longer use `make_afun()`.
+
 ### Bug Fixes
 * Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables.
 
diff --git a/R/count_occurrences_by_grade.R b/R/count_occurrences_by_grade.R
index d77a049a7a..d23001605d 100644
--- a/R/count_occurrences_by_grade.R
+++ b/R/count_occurrences_by_grade.R
@@ -16,6 +16,7 @@
 #' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
 #' occurred.
 #'
+#' @inheritParams count_occurrences
 #' @inheritParams argument_convention
 #' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades.
 #' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups
@@ -149,15 +150,24 @@ h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only
 #' @export
 s_count_occurrences_by_grade <- function(df,
                                          .var,
+                                         .N_row, # nolint
                                          .N_col, # nolint
                                          id = "USUBJID",
                                          grade_groups = list(),
                                          remove_single = TRUE,
                                          only_grade_groups = FALSE,
+                                         denom = c("N_col", "n", "N_row"),
                                          labelstr = "") {
   assert_valid_factor(df[[.var]])
   assert_df_with_variables(df, list(grade = .var, id = id))
 
+  denom <- match.arg(denom) %>%
+    switch(
+      n = nlevels(factor(df[[id]])),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+
   if (nrow(df) < 1) {
     grade_levels <- levels(df[[.var]])
     l_count <- as.list(rep(0, length(grade_levels)))
@@ -201,7 +211,17 @@ s_count_occurrences_by_grade <- function(df,
     l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups)
   }
 
-  l_count_fraction <- lapply(l_count, function(i, denom) c(i, i / denom), denom = .N_col)
+  l_count_fraction <- lapply(
+    l_count,
+    function(i, denom) {
+      if (i == 0 && denom == 0) {
+        c(0, 0)
+      } else {
+        c(i, i / denom)
+      }
+    },
+    denom = denom
+  )
 
   list(
     count_fraction = l_count_fraction
@@ -215,22 +235,72 @@ s_count_occurrences_by_grade <- function(df,
 #' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
 #' @examples
-#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
-#' # function `format_count_fraction()` can be applied correctly.
-#' afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
-#' afun(
+#' a_count_occurrences_by_grade(
 #'   df,
 #'   .N_col = 10L,
+#'   .N_row = 10L,
 #'   .var = "AETOXGR",
 #'   id = "USUBJID",
 #'   grade_groups = list("ANY" = levels(df$AETOXGR))
 #' )
 #'
 #' @export
-a_count_occurrences_by_grade <- make_afun(
-  s_count_occurrences_by_grade,
-  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
-)
+a_count_occurrences_by_grade <- function(df,
+                                         labelstr = "",
+                                         id = "USUBJID",
+                                         grade_groups = list(),
+                                         remove_single = TRUE,
+                                         only_grade_groups = FALSE,
+                                         denom = c("N_col", "n", "N_row"),
+                                         .N_col, # nolint
+                                         .N_row, # nolint
+                                         .df_row,
+                                         .var = NULL,
+                                         .stats = NULL,
+                                         .formats = NULL,
+                                         .labels = NULL,
+                                         .indent_mods = NULL,
+                                         na_str = default_na_str()) {
+  x_stats <- s_count_occurrences_by_grade(
+    df = df, .var = .var, .N_row = .N_row, .N_col = .N_col, id = id,
+    grade_groups = grade_groups, remove_single = remove_single, only_grade_groups = only_grade_groups,
+    denom = denom, labelstr = labelstr
+  )
+
+  if (is.null(unlist(x_stats))) {
+    return(NULL)
+  }
+  x_lvls <- names(x_stats[[1]])
+
+  # Fill in with formatting defaults if needed
+  .stats <- get_stats("count_occurrences_by_grade", stats_in = .stats)
+  if (length(.formats) == 1 && is.null(names(.formats))) {
+    .formats <- rep(.formats, length(.stats)) %>% setNames(.stats)
+  }
+  .formats <- get_formats_from_stats(.stats, .formats)
+  .labels <- get_labels_from_stats(.stats, .labels, row_nms = x_lvls)
+  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = x_lvls)
+
+  if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]]
+  x_stats <- x_stats[.stats]
+
+  # Ungroup statistics with values for each level of x
+  x_ungrp <- ungroup_stats(x_stats, .formats, .labels, list())
+  x_stats <- x_ungrp[["x"]]
+  .formats <- x_ungrp[[".formats"]]
+
+  # Auto format handling
+  .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var)
+
+  in_rows(
+    .list = x_stats,
+    .formats = .formats,
+    .names = unlist(.labels),
+    .labels = unlist(.labels),
+    .indent_mods = .indent_mods,
+    .format_na_strs = na_str
+  )
+}
 
 #' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
 #'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
@@ -300,40 +370,36 @@ count_occurrences_by_grade <- function(lyt,
                                        nested = TRUE,
                                        ...,
                                        table_names = var,
-                                       .stats = NULL,
-                                       .formats = NULL,
+                                       .stats = "count_fraction",
+                                       .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                        .indent_mods = NULL,
                                        .labels = NULL) {
   checkmate::assert_flag(riskdiff)
+  extra_args <- list(
+    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
+  )
   s_args <- list(
     id = id, grade_groups = grade_groups, remove_single = remove_single, only_grade_groups = only_grade_groups, ...
   )
 
-  afun <- make_afun(
-    a_count_occurrences_by_grade,
-    .stats = .stats,
-    .formats = .formats,
-    .indent_mods = .indent_mods,
-    .ungroup_stats = "count_fraction"
-  )
-
-  extra_args <- if (isFALSE(riskdiff)) {
-    s_args
+  if (isFALSE(riskdiff)) {
+    extra_args <- c(extra_args, s_args)
   } else {
-    list(
-      afun = list("s_count_occurrences_by_grade" = afun),
-      .stats = .stats,
-      .indent_mods = .indent_mods,
-      s_args = s_args
+    extra_args <- c(
+      extra_args,
+      list(
+        afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade),
+        s_args = s_args
+      )
     )
   }
 
   analyze(
     lyt = lyt,
     vars = var,
+    afun = ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff),
     var_labels = var_labels,
     show_labels = show_labels,
-    afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
     table_names = table_names,
     na_str = na_str,
     nested = nested,
@@ -377,29 +443,37 @@ summarize_occurrences_by_grade <- function(lyt,
                                            grade_groups = list(),
                                            remove_single = TRUE,
                                            only_grade_groups = FALSE,
+                                           riskdiff = FALSE,
                                            na_str = default_na_str(),
                                            ...,
-                                           .stats = NULL,
-                                           .formats = NULL,
+                                           .stats = "count_fraction",
+                                           .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                            .indent_mods = NULL,
                                            .labels = NULL) {
+  checkmate::assert_flag(riskdiff)
   extra_args <- list(
+    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
+  )
+  s_args <- list(
     id = id, grade_groups = grade_groups, remove_single = remove_single, only_grade_groups = only_grade_groups, ...
   )
 
-  cfun <- make_afun(
-    a_count_occurrences_by_grade,
-    .stats = .stats,
-    .formats = .formats,
-    .labels = .labels,
-    .indent_mods = .indent_mods,
-    .ungroup_stats = "count_fraction"
-  )
+  if (isFALSE(riskdiff)) {
+    extra_args <- c(extra_args, s_args)
+  } else {
+    extra_args <- c(
+      extra_args,
+      list(
+        afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade),
+        s_args = s_args
+      )
+    )
+  }
 
   summarize_row_groups(
     lyt = lyt,
     var = var,
-    cfun = cfun,
+    cfun = ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff),
     na_str = na_str,
     extra_args = extra_args
   )
diff --git a/man/count_occurrences_by_grade.Rd b/man/count_occurrences_by_grade.Rd
index 547fa84d69..b4090394a3 100644
--- a/man/count_occurrences_by_grade.Rd
+++ b/man/count_occurrences_by_grade.Rd
@@ -21,8 +21,8 @@ count_occurrences_by_grade(
   nested = TRUE,
   ...,
   table_names = var,
-  .stats = NULL,
-  .formats = NULL,
+  .stats = "count_fraction",
+  .formats = list(count_fraction = format_count_fraction_fixed_dp),
   .indent_mods = NULL,
   .labels = NULL
 )
@@ -34,10 +34,11 @@ summarize_occurrences_by_grade(
   grade_groups = list(),
   remove_single = TRUE,
   only_grade_groups = FALSE,
+  riskdiff = FALSE,
   na_str = default_na_str(),
   ...,
-  .stats = NULL,
-  .formats = NULL,
+  .stats = "count_fraction",
+  .formats = list(count_fraction = format_count_fraction_fixed_dp),
   .indent_mods = NULL,
   .labels = NULL
 )
@@ -45,23 +46,33 @@ summarize_occurrences_by_grade(
 s_count_occurrences_by_grade(
   df,
   .var,
+  .N_row,
   .N_col,
   id = "USUBJID",
   grade_groups = list(),
   remove_single = TRUE,
   only_grade_groups = FALSE,
+  denom = c("N_col", "n", "N_row"),
   labelstr = ""
 )
 
 a_count_occurrences_by_grade(
   df,
-  .var,
-  .N_col,
+  labelstr = "",
   id = "USUBJID",
   grade_groups = list(),
   remove_single = TRUE,
   only_grade_groups = FALSE,
-  labelstr = ""
+  denom = c("N_col", "n", "N_row"),
+  .N_col,
+  .N_row,
+  .df_row,
+  .var = NULL,
+  .stats = NULL,
+  .formats = NULL,
+  .labels = NULL,
+  .indent_mods = NULL,
+  na_str = default_na_str()
 )
 }
 \arguments{
@@ -115,12 +126,23 @@ unmodified default behavior. Can be negative.}
 \item{.var, var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested
 by a statistics function.}
 
+\item{.N_row}{(\code{integer(1)})\cr row-wise N (row group count) for the group of observations being analyzed
+(i.e. with no column-based subsetting) that is typically passed by \code{rtables}.}
+
 \item{.N_col}{(\code{integer(1)})\cr column-wise N (column count) for the full column being analyzed that is typically
 passed by \code{rtables}.}
 
+\item{denom}{(\code{string})\cr choice of denominator for patient proportions. Can be:
+\itemize{
+\item \code{N_col}: total number of patients in this column across rows
+\item \code{n}: number of patients with any occurrences
+}}
+
 \item{labelstr}{(\code{string})\cr label of the level of the parent split currently being summarized
 (must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}}
 for more information.}
+
+\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
 }
 \value{
 \itemize{
@@ -248,12 +270,10 @@ s_count_occurrences_by_grade(
   grade_groups = list("ANY" = levels(df$AETOXGR))
 )
 
-#  We need to ungroup `count_fraction` first so that the `rtables` formatting
-# function `format_count_fraction()` can be applied correctly.
-afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
-afun(
+a_count_occurrences_by_grade(
   df,
   .N_col = 10L,
+  .N_row = 10L,
   .var = "AETOXGR",
   id = "USUBJID",
   grade_groups = list("ANY" = levels(df$AETOXGR))
diff --git a/tests/testthat/_snaps/count_occurrences_by_grade.md b/tests/testthat/_snaps/count_occurrences_by_grade.md
index 50c754c762..9ea6ef570b 100644
--- a/tests/testthat/_snaps/count_occurrences_by_grade.md
+++ b/tests/testthat/_snaps/count_occurrences_by_grade.md
@@ -228,6 +228,39 @@
       
       
 
+# a_count_occurrences_by_grade works with healthy input.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+         row_name formatted_cell indent_mod row_label
+      1         1        2 (20%)          0         1
+      2         2        2 (20%)          0         2
+      3         3        2 (20%)          0         3
+      4         4              0          0         4
+      5         5              0          0         5
+      6         1      2 (20.0%)          0         1
+      7         2      2 (20.0%)          0         2
+      8         3      2 (20.0%)          0         3
+      9         4              0          0         4
+      10        5              0          0         5
+
+# a_count_occurrences_by_grade works with custom input.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+          row_name formatted_cell indent_mod  row_label
+      1   Level: 1        2 (20%)          1   Level: 1
+      2      LVL 2        2 (20%)          2      LVL 2
+      3 Count of 3        2 (20%)          0 Count of 3
+      4  Missing 4         0 (0%)          3  Missing 4
+      5          5         0 (0%)          0          5
+
 # count_occurrences_by_grade works with default arguments for intensity
 
     Code
@@ -248,9 +281,9 @@
                      A           B         D  
                    (N=3)       (N=3)     (N=0)
       ————————————————————————————————————————
-      MILD           0       2 (66.7%)    NA  
-      MODERATE   1 (33.3%)   1 (33.3%)    NA  
-      SEVERE     2 (66.7%)       0        NA  
+      MILD           0       2 (66.7%)     0  
+      MODERATE   1 (33.3%)   1 (33.3%)     0  
+      SEVERE     2 (66.7%)       0         0  
 
 # count_occurrences_by_grade label works when more than one variables are analyzed
 
@@ -322,17 +355,17 @@
     Code
       res
     Output
-                         A               B             D     
-                      (N=10)          (N=10)         (N=0)   
-      ———————————————————————————————————————————————————————
-      LOW                                                    
-        MILD       0.00 (0.00%)    1.00 (10.00%)   0.00 (NA%)
-        MODERATE   0.00 (0.00%)    0.00 (0.00%)    0.00 (NA%)
-        SEVERE     2.00 (20.00%)   0.00 (0.00%)    0.00 (NA%)
-      HIGH                                                   
-        MILD       0.00 (0.00%)    1.00 (10.00%)   0.00 (NA%)
-        MODERATE   1.00 (10.00%)   1.00 (10.00%)   0.00 (NA%)
-        SEVERE     0.00 (0.00%)    0.00 (0.00%)    0.00 (NA%)
+                         A               B              D      
+                      (N=10)          (N=10)          (N=0)    
+      —————————————————————————————————————————————————————————
+      LOW                                                      
+        MILD       0.00 (0.00%)    1.00 (10.00%)   0.00 (0.00%)
+        MODERATE   0.00 (0.00%)    0.00 (0.00%)    0.00 (0.00%)
+        SEVERE     2.00 (20.00%)   0.00 (0.00%)    0.00 (0.00%)
+      HIGH                                                     
+        MILD       0.00 (0.00%)    1.00 (10.00%)   0.00 (0.00%)
+        MODERATE   1.00 (10.00%)   1.00 (10.00%)   0.00 (0.00%)
+        SEVERE     0.00 (0.00%)    0.00 (0.00%)    0.00 (0.00%)
 
 # summarize_occurrences_by_grade works with custom arguments for grade
 
@@ -400,3 +433,39 @@
         MODERATE    7 (3.5%)     9 (5.1%)       6 (3.7%)           -1.6 (-5.7 - 2.5)      
         SEVERE     17 (8.4%)    23 (13.0%)     22 (13.6%)          -4.6 (-10.8 - 1.7)     
 
+# summarize_occurrences works as expected with risk difference column
+
+    Code
+      res
+    Output
+                   A: Drug X    B: Placebo   C: Combination   Risk Difference (%) (95% CI)
+                    (N=202)      (N=177)        (N=162)                 (N=379)           
+      ————————————————————————————————————————————————————————————————————————————————————
+      F                                                                                   
+        MILD        3 (1.5%)     4 (2.3%)       1 (0.6%)           -0.8 (-3.5 - 2.0)      
+        MODERATE    8 (4.0%)     6 (3.4%)       6 (3.7%)            0.6 (-3.2 - 4.4)      
+        SEVERE     22 (10.9%)   21 (11.9%)     20 (12.3%)          -1.0 (-7.4 - 5.4)      
+      M                                                                                   
+        MILD        3 (1.5%)        0           1 (0.6%)            1.5 (-0.2 - 3.2)      
+        MODERATE   11 (5.4%)     9 (5.1%)       8 (4.9%)            0.4 (-4.1 - 4.9)      
+        SEVERE     12 (5.9%)    17 (9.6%)      12 (7.4%)           -3.7 (-9.1 - 1.8)      
+
+---
+
+    Code
+      res
+    Output
+                     A: Drug X   B: Placebo   C: Combination   Risk Difference (%) (95% CI)
+                      (N=202)     (N=177)        (N=162)                 (N=379)           
+      —————————————————————————————————————————————————————————————————————————————————————
+      F                                                                                    
+          -Any-      20 (9.9%)   19 (10.7%)     19 (11.7%)          -0.8 (-7.0 - 5.3)      
+          MILD       2 (1.0%)     2 (1.1%)          0               -0.1 (-2.2 - 1.9)      
+          MODERATE   4 (2.0%)     3 (1.7%)       3 (1.9%)            0.3 (-2.4 - 3.0)      
+          SEVERE     14 (6.9%)   14 (7.9%)      16 (9.9%)           -1.0 (-6.3 - 4.3)      
+      M                                                                                    
+          -Any-      14 (6.9%)   21 (11.9%)     17 (10.5%)          -4.9 (-10.8 - 1.0)     
+          MILD       1 (0.5%)        0           1 (0.6%)            0.5 (-0.5 - 1.5)      
+          MODERATE   4 (2.0%)     7 (4.0%)       5 (3.1%)           -2.0 (-5.4 - 1.5)      
+          SEVERE     9 (4.5%)    14 (7.9%)      11 (6.8%)           -3.5 (-8.3 - 1.4)      
+
diff --git a/tests/testthat/test-count_occurrences_by_grade.R b/tests/testthat/test-count_occurrences_by_grade.R
index 105c906585..3664e7f246 100644
--- a/tests/testthat/test-count_occurrences_by_grade.R
+++ b/tests/testthat/test-count_occurrences_by_grade.R
@@ -135,6 +135,34 @@ testthat::test_that("s_count_occurrences_by_grade works with valid input for int
   testthat::expect_snapshot(res)
 })
 
+testthat::test_that("a_count_occurrences_by_grade works with healthy input.", {
+  options("width" = 100)
+
+  result <- a_count_occurrences_by_grade(
+    df = raw_data, .N_col = 10, .N_row = 10, .df_row = raw_data,
+    .stats = get_stats("count_occurrences_by_grade"),
+    .var = "AETOXGR", id = "USUBJID"
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("a_count_occurrences_by_grade works with custom input.", {
+  options("width" = 100)
+
+  result <- a_count_occurrences_by_grade(
+    df = raw_data, .N_col = 10, .N_row = 10, .df_row = raw_data,
+    .stats = "count_fraction",
+    .formats = c(count_fraction = "xx (xx%)"),
+    .labels = list("1" = "Level: 1", "2" = "LVL 2", "count_fraction.3" = "Count of 3", "4" = "Missing 4"),
+    .indent_mods = list("1" = 1L, "2" = 2L, "count_fraction.4" = 3L),
+    .var = "AETOXGR", id = "USUBJID"
+  )
+
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
 testthat::test_that("count_occurrences_by_grade works with default arguments for intensity", {
   df <- raw_data
   df_adsl <- unique(df[c("ARM", "ARM_EMPTY", "USUBJID")])
@@ -414,3 +442,38 @@ testthat::test_that("count_occurrences_by_grade works as expected with risk diff
   res <- testthat::expect_silent(result)
   testthat::expect_snapshot(res)
 })
+
+testthat::test_that("summarize_occurrences works as expected with risk difference column", {
+  tern_ex_adae$AESEV <- factor(tern_ex_adae$AESEV)
+
+  # Default parameters
+  result <- basic_table(show_colcounts = TRUE) %>%
+    split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>%
+    split_rows_by("SEX", child_labels = "visible") %>%
+    summarize_occurrences_by_grade(
+      var = "AESEV",
+      riskdiff = TRUE
+    ) %>%
+    build_table(tern_ex_adae)
+
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # Grade groups, custom id var
+  grade_groups <- list("-Any-" = levels(tern_ex_adae$AESEV))
+
+  result <- basic_table(show_colcounts = TRUE) %>%
+    split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>%
+    split_rows_by("SEX", child_labels = "visible") %>%
+    summarize_occurrences_by_grade(
+      var = "AESEV",
+      riskdiff = TRUE,
+      .indent_mods = 1L,
+      grade_groups = grade_groups,
+      id = "SITEID"
+    ) %>%
+    build_table(tern_ex_adae)
+
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})