Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1324 feature request varying decimal precision in a summary #1356

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,4 @@ Collate:
'utils_grid.R'
'utils_rtables.R'
'utils_split_funs.R'
'xutils_custom_stats_formats_varying_dp.R'
134 changes: 131 additions & 3 deletions R/analyze_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,53 @@
#'
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
#' a_summary(
#' rnorm(10, 5, 1),
#' .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE,
#' .stats = "mean",
#' fmts_df_var = "variant1",
#' d = 2
#' )
#'
#' x1 <- rnorm(10, 5, 1)
#' xref <- rnorm(20, -5, 1)
#'
#' a_summary(
#' x1,
#' .ref_group = xref, .var = "bla", compare = TRUE,
#' .stats = c("mean", "sd"),
#' .formats = c("mean" = format_xx("xx.xxx"), "sd" = format_xx("xx.x"))
#' )
#' a_summary(
#' x1,
#' .ref_group = xref, .var = "bla", compare = TRUE,
#' .stats = "mean_sd",
#' fmt_specs = list(
#' fmts_df_var = "variant2",
#' d = 1,
#' formatting_function = "format_xx"
#' )
#' )
#' a_summary(
#' x1,
#' .ref_group = xref, .var = "bla", compare = TRUE,
#' .stats = c("mean", "mean_sd", "mean_pval")
#' )
#'
#' our_fmt_specs_variant <- list(
#' fmts_df = tern_formats_custom_df(),
#' fmts_df_var = "default",
#' formatting_function = "format_xx_fixed_dp",
#' d = 0
#' )

#' a_summary(
#' x1, .ref_group = xref, .var = "bla", compare = TRUE,
#' .stats = c("mean", "mean_sd", "mean_pval"),
#' .formats = c("mean_sd" = "xx.d (xx.dxxxx)"),
#' fmt_specs = our_fmt_specs_variant
#' )

#' @export
a_summary <- function(x,
.N_col, # nolint
Expand All @@ -479,6 +525,7 @@
.indent_mods = NULL,
na.rm = TRUE, # nolint
na_str = default_na_str(),
fmt_specs = default_fmt_specs,
...) {
extra_args <- list(...)
if (is.numeric(x)) {
Expand Down Expand Up @@ -507,7 +554,28 @@
# Fill in with formatting defaults if needed
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
.stats <- get_stats(met_grp, stats_in = .stats, add_pval = compare)
.formats <- get_formats_from_stats(.stats, .formats)

if (is.null(fmt_specs$fmts_df)) {
.formats <- get_formats_from_stats(.stats, .formats)
} else {
d_actual <- derive_d_from_fmt_specs(fmt_specs, .df_row)

# update the spec with the actual derived d
fmt_specs$d <- d_actual

# core function that does the conversion of the xx.d based formats to the actual format
# note that is it most safe to apply formatting functions, as many of the final formats will not belong to
# list_valid_format_labels()

Check warning on line 568 in R/analyze_variables.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / Lint R code 🧶

file=R/analyze_variables.R,line=568,col=7,[commented_code_linter] Commented code should be removed.
.formats_all <- get_formats_from_stats_custom(
.stats,
formats_in = .formats,
### variant specific arguments
fmts_specs = fmt_specs
)
.formats <- .formats_all$fmt
.formats_char <- .formats_all$fmt_char
}

.indent_mods <- get_indents_from_stats(.stats, .indent_mods)

lbls <- get_labels_from_stats(.stats, .labels)
Expand Down Expand Up @@ -615,6 +683,59 @@
#' ) %>%
#' build_table(dt)
#'
#' # custom format
#' our_fmt_specs_variant <- list(
#' fmts_df = tern_formats_custom_df(),
#' fmts_df_var = "variant2",
#' formatting_function = "format_xx_fixed_dp",
#' d = 0
#' )
#'
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4))
#' basic_table() %>%
#' analyze_vars(
#' vars = "VAR",
#' .stats = c("n", "mean", "mean_sd", "range"),
#' .formats = c("mean" = "xx.dxx"),
#' fmt_specs = our_fmt_specs_variant,
#' ) %>%
#' build_table(dt)
#'
#' # custom format
#' our_fmt_specs_variant2 <- list(
#' fmts_df = tern_formats_custom_df(),
#' fmts_df_var = "variant2",
#' formatting_function = "format_xx_fixed_dp",
#' d = "decimal"
#' )
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4), decimal = 2)
#' basic_table() %>%
#' analyze_vars(
#' vars = "VAR",
#' .stats = c("n", "mean", "mean_sd", "range"),
#' .formats = c("mean" = "xx.dxxxxxx"),
#' fmt_specs = our_fmt_specs_variant2,
#' ) %>%
#' build_table(dt)
#'
#' # custom format
#' dt2 <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4, 0.002, 0.004, 0.006), decimal = c(rep(2, 4), rep(1, 4)), by = c(rep("by1", 4), rep("by2", 4)))

Check warning on line 722 in R/analyze_variables.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / Lint R code 🧶

file=R/analyze_variables.R,line=722,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 154 characters.
#' our_fmt_specs_variant2 <- list(
#' fmts_df = tern_formats_custom_df(),
#' fmts_df_var = "variant2",
#' formatting_function = "format_xx_fixed_dp",
#' d = "decimal",
#' d_cap = 3
#' )
#' basic_table() %>%
#' split_rows_by("by") %>%
#' analyze_vars(
#' vars = "VAR",
#' .stats = c("n", "mean", "mean_sd", "range"),
#' fmt_specs = our_fmt_specs_variant2
#' ) %>%
#' build_table(dt2)
#'
#' @export
#' @order 2
analyze_vars <- function(lyt,
Expand All @@ -630,8 +751,15 @@
.stats = c("n", "mean_sd", "median", "range", "count_fraction"),
.formats = NULL,
.labels = NULL,
.indent_mods = NULL) {
extra_args <- list(.stats = .stats, na.rm = na.rm, na_str = na_str, ...)
.indent_mods = NULL,
# varying precision arguments
fmt_specs = default_fmt_specs) {
Copy link
Contributor

@Melkiades Melkiades Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything should pass by .formats. Having this is in addition is confusing. We can do already everything from .formats at the moment. Hence, I do not think we need this. It could be helpful to take a look again at the automatic formatting that we provide here in {tern}

extra_args <- list(
.stats = .stats, na.rm = na.rm, na_str = na_str,
fmt_specs = fmt_specs,
...
)

if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
Expand Down
84 changes: 84 additions & 0 deletions R/formatting_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,90 @@ format_xx <- function(str) {
return(rtable_format)
}

#' Alternative format XX function :a formatting function with fixed decimal precision
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx_fixed_dp("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx_fixed_dp("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx_fixed_dp("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#' @seealso [format_xx]
#' @export
format_xx_fixed_dp <- function(str, na_str) {
# Find position in the string.
if (grepl(pattern = "xxx.", x = str, fixed = TRUE)) {
stop("Error: format_xx_fixed_dp do not use xxx. in input str, replace by xx. instead")
}
if (!grepl(pattern = "xx", x = str, fixed = TRUE)) {
stop("Error: format_xx_fixed_dp: input str should contain xx")
}
positions <- gregexpr(
pattern = "xx\\.?x*", text = str,
perl = TRUE
)
x_positions <- regmatches(x = str, m = positions)[[1]]
### str is splitted into pieces as xx. xx xx.xxx
### xx is no rounding
### xx. rounding to integer (is treated same as rounding to 0 decimal)
### xx.x rounding to 1 decimal, etc

no_round <- function(x) {
if (is.na(x)) {
return(na_str)
} else {
return(x)
}
}
roundfunc <- round

# Roundings depends on the number of x behind [.].
roundings <- lapply(
X = x_positions,
function(x) {
if (x == "xx") {
rounding <- no_round
} else {
y <- strsplit(split = "\\.", x = x)[[1]]
digits <- ifelse(length(y) > 1, nchar(y[2]), 0)

rounding <- function(x) {
if (is.na(x)) {
return(na_str)
} else {
format(roundfunc(x, digits = digits),
nsmall = digits
)
}
}
}

return(rounding)
}
)

rtable_format <- function(x, output) {
values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
regmatches(x = str, m = positions)[[1]] <- values
return(str)
}

return(rtable_format)
}


#' Format numeric values by significant figures
#'
#' Format numeric values to print with a specified number of significant figures.
Expand Down
Loading
Loading