From 57b193d56d46ae98de2d21007a1c5b1cfd457adc Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 12 Aug 2023 15:46:07 +0200 Subject: [PATCH] Implement `means_by_group()` (#446) * draft grouped mean * add methods * fix * fix * docs * fix printing issues * fix check issues * fix * news, desc * fix issues * add p-value * emmeans to suggests * add ci * fix, tests * snapshots * docs * add tests * docs * round weighted N * fix for weights * skip test if emmeans not installed * typo in news * fix typos in docs --------- Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- DESCRIPTION | 3 +- NAMESPACE | 7 + NEWS.md | 3 + R/means_by_group.R | 294 ++++++++++++++++++ _pkgdown.yaml | 1 + man/means_by_group.Rd | 125 ++++++++ tests/testthat/_snaps/means_by_group.md | 172 ++++++++++ .../testthat/_snaps/windows/means_by_group.md | 34 ++ tests/testthat/test-labelled_data.R | 36 ++- tests/testthat/test-means_by_group.R | 21 ++ 10 files changed, 678 insertions(+), 18 deletions(-) create mode 100644 R/means_by_group.R create mode 100644 man/means_by_group.Rd create mode 100644 tests/testthat/_snaps/means_by_group.md create mode 100644 tests/testthat/_snaps/windows/means_by_group.md create mode 100644 tests/testthat/test-means_by_group.R diff --git a/DESCRIPTION b/DESCRIPTION index 3c71a6343..d86f68812 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.8.0.5 +Version: 0.8.0.6 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), @@ -43,6 +43,7 @@ Suggests: data.table, dplyr (>= 1.0), effectsize, + emmeans, gamm4, ggplot2, gt, diff --git a/NAMESPACE b/NAMESPACE index bb2d43766..fac5fbaf6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,7 @@ S3method(describe_distribution,numeric) S3method(format,data_codebook) S3method(format,dw_data_peek) S3method(format,dw_data_tabulate) +S3method(format,dw_groupmeans) S3method(format,parameters_distribution) S3method(kurtosis,data.frame) S3method(kurtosis,default) @@ -76,6 +77,9 @@ S3method(labels_to_levels,data.frame) S3method(labels_to_levels,default) S3method(labels_to_levels,factor) S3method(makepredictcall,dw_transformer) +S3method(means_by_group,data.frame) +S3method(means_by_group,default) +S3method(means_by_group,numeric) S3method(normalize,data.frame) S3method(normalize,factor) S3method(normalize,grouped_df) @@ -86,6 +90,8 @@ S3method(print,data_codebook) S3method(print,dw_data_peek) S3method(print,dw_data_tabulate) S3method(print,dw_data_tabulates) +S3method(print,dw_groupmeans) +S3method(print,dw_groupmeans_list) S3method(print,dw_transformer) S3method(print,parameters_distribution) S3method(print,parameters_kurtosis) @@ -252,6 +258,7 @@ export(get_columns) export(kurtosis) export(labels_to_levels) export(mean_sd) +export(means_by_group) export(median_mad) export(normalize) export(print_html) diff --git a/NEWS.md b/NEWS.md index 529ec1398..858b95f93 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,9 @@ NEW FUNCTIONS * `rowmean_n()`, to compute row means if row contains at least `n` non-missing values. +* `means_by_group()`, to compute mean values of variables, grouped by levels + of specified factors. + CHANGES * `recode_into()` gains an `overwrite` argument to skip overwriting already diff --git a/R/means_by_group.R b/R/means_by_group.R new file mode 100644 index 000000000..1d3f6fd52 --- /dev/null +++ b/R/means_by_group.R @@ -0,0 +1,294 @@ +#' @title Summary of mean values by group +#' @name means_by_group +#' +#' @description Computes summary table of means by groups. +#' +#' @param x A vector or a data frame. +#' @param group If `x` is a numeric vector, `group` should be a factor that +#' indicates the group-classifying categories. If `x` is a data frame, `group` +#' should be a character string, naming the variable in `x` that is used for +#' grouping. Numeric vectors are coerced to factors. Not that `group` should +#' only refer to a single variable. +#' @param ci Level of confidence interval for mean estimates. Default is `0.95`. +#' Use `ci = NA` to suppress confidence intervals. +#' @param weights If `x` is a numeric vector, `weights` should be a vector of +#' weights that will be applied to weight all observations. If `x` is a data +#' frame, `weights` can also be a character string indicating the name of the +#' variable in `x` that should be used for weighting. Default is `NULL`, so no +#' weights are used. +#' @param digits Optional scalar, indicating the amount of digits after decimal +#' point when rounding estimates and values. +#' @param ... Currently not used +#' @inheritParams find_columns +#' +#' @return A data frame with information on mean and further summary statistics +#' for each sub-group. +#' +#' @details This function is comparable to `aggregate(x, group, mean)`, but provides +#' some further information, including summary statistics from a One-Way-ANOVA +#' using `x` as dependent and `group` as independent variable. [`emmeans::contrast()`] +#' is used to get p-values for each sub-group. P-values indicate whether each +#' group-mean is significantly different from the total mean. +#' +#' @examples +#' data(efc) +#' means_by_group(efc, "c12hour", "e42dep") +#' +#' data(iris) +#' means_by_group(iris, "Sepal.Width", "Species") +#' +#' # weighting +#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) +#' means_by_group(efc, "c12hour", "e42dep", weights = "weight") +#' @export +means_by_group <- function(x, ...) { + UseMethod("means_by_group") +} + + +#' @export +means_by_group.default <- function(x, ...) { + insight::format_error("`means_by_group()` does not work for objects of class `", class(x)[1], "`.") +} + + +#' @rdname means_by_group +#' @export +means_by_group.numeric <- function(x, + group = NULL, + ci = 0.95, + weights = NULL, + digits = NULL, + ...) { + # sanity check for arguments + + # "group" must be provided + if (is.null(group)) { + insight::format_error("Argument `group` is missing.") + } + + # group must be of same length as x + if (length(group) != length(x)) { + insight::format_error("Argument `group` must be of same length as `x`.") + } + + # if weights are provided, must be of same length as x + if (!is.null(weights) && length(weights) != length(x)) { + insight::format_error("Argument `weights` must be of same length as `x`.") + } + + # if weights are NULL, set weights to 1 + if (is.null(weights)) weights <- rep(1, length(x)) + + # retrieve labels + var_mean_label <- attr(x, "label", exact = TRUE) + var_grp_label <- attr(group, "label", exact = TRUE) + + # if no labels present, use variable names directly + if (is.null(var_mean_label)) { + var_mean_label <- deparse(substitute(x)) + } + if (is.null(var_grp_label)) { + var_grp_label <- deparse(substitute(group)) + } + + # coerce group to factor if numeric, or convert labels to levels, if factor + if (is.factor(group)) { + group <- tryCatch(labels_to_levels(group, verbose = FALSE), error = function(e) group) + } else { + group <- to_factor(group) + } + + data <- stats::na.omit(data.frame( + x = x, + group = group, + weights = weights, + stringsAsFactors = FALSE + )) + + # get grouped means table + out <- .means_by_group(data, ci = ci) + + # attributes + attr(out, "var_mean_label") <- var_mean_label + attr(out, "var_grp_label") <- var_grp_label + attr(out, "digits") <- digits + + class(out) <- c("dw_groupmeans", "data.frame") + out +} + + +#' @rdname means_by_group +#' @export +means_by_group.data.frame <- function(x, + select = NULL, + group = NULL, + ci = 0.95, + weights = NULL, + digits = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { + # evaluate select/exclude, may be select-helpers + select <- .select_nse(select, + x, + exclude, + ignore_case, + regex = regex, + verbose = verbose + ) + + if (is.null(weights)) { + w <- NULL + } else if (is.character(weights)) { + w <- x[[weights]] + } else { + w <- weights + } + + out <- lapply(select, function(i) { + # if no labels present, use variable names directy + if (is.null(attr(x[[i]], "label", exact = TRUE))) { + attr(x[[i]], "label") <- i + } + if (is.null(attr(x[[group]], "label", exact = TRUE))) { + attr(x[[group]], "label") <- group + } + # compute means table + means_by_group(x[[i]], group = x[[group]], ci = ci, weights = w, digits = digits, ...) + }) + + class(out) <- c("dw_groupmeans_list", "list") + out +} + + +#' @keywords internal +.means_by_group <- function(data, ci = 0.95) { + # compute anova statistics for mean table + if (is.null(data$weights) || all(data$weights == 1)) { + fit <- stats::lm(x ~ group, data = data) + } else { + fit <- stats::lm(x ~ group, weights = data$weights, data = data) + } + + # summary table data + groups <- split(data$x, data$group) + group_weights <- split(data$weights, data$group) + out <- do.call(rbind, Map(function(x, w) { + data.frame( + Mean = weighted_mean(x, weights = w), + SD = weighted_sd(x, weights = w), + N = round(sum(w)), + stringsAsFactors = FALSE + ) + }, groups, group_weights)) + + # add group names + out$Category <- levels(data$group) + out$p <- out$CI_high <- out$CI_low <- NA + + # p-values of contrast-means + if (insight::check_if_installed("emmeans", quietly = TRUE)) { + # create summary table of contrasts, for p-values and confidence intervals + predicted <- emmeans::emmeans(fit, specs = "group", level = ci) + contrasts <- emmeans::contrast(predicted, method = "eff") + # add p-values and confidence intervals to "out" + if (!is.null(ci) && !is.na(ci)) { + summary_table <- as.data.frame(predicted) + out$CI_low <- summary_table$lower.CL + out$CI_high <- summary_table$upper.CL + } + summary_table <- as.data.frame(contrasts) + out$p <- summary_table$p.value + } + + # reorder columns + out <- out[c("Category", "Mean", "N", "SD", "CI_low", "CI_high", "p")] + + # finally, add total-row + out <- rbind( + out, + data.frame( + Category = "Total", + Mean = weighted_mean(data$x, weights = data$weights), + N = nrow(data), + SD = weighted_sd(data$x, weights = data$weights), + CI_low = NA, + CI_high = NA, + p = NA, + stringsAsFactors = FALSE + ) + ) + + # get anova statistics for mean table + sum.fit <- summary(fit) + + # r-squared values + r2 <- sum.fit$r.squared + r2.adj <- sum.fit$adj.r.squared + + # F-statistics + fstat <- sum.fit$fstatistic + pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = FALSE) + + # copy as attributes + attr(out, "r2") <- r2 + attr(out, "ci") <- ci + attr(out, "adj.r2") <- r2.adj + attr(out, "fstat") <- fstat[1] + attr(out, "p.value") <- pval + + out +} + + +# methods ----------------- + +#' @export +format.dw_groupmeans <- function(x, digits = NULL, ...) { + if (is.null(digits)) { + digits <- attr(x, "digits", exact = TRUE) + } + if (is.null(digits)) { + digits <- 2 + } + x$N <- insight::format_value(x$N, digits = 0) + insight::format_table(remove_empty_columns(x), digits = digits, ...) +} + +#' @export +print.dw_groupmeans <- function(x, digits = NULL, ...) { + out <- format(x, digits = digits, ...) + + # caption + l1 <- attributes(x)$var_mean_label + l2 <- attributes(x)$var_grp_label + if (!is.null(l1) && !is.null(l2)) { + caption <- c(paste0("# Mean of ", l1, " by ", l2), "blue") + } else { + caption <- NULL + } + + # footer + footer <- paste0( + "\nAnova: R2=", insight::format_value(attributes(x)$r2, digits = 3), + "; adj.R2=", insight::format_value(attributes(x)$adj.r2, digits = 3), + "; F=", insight::format_value(attributes(x)$fstat, digits = 3), + "; ", insight::format_p(attributes(x)$p.value, whitespace = FALSE), + "\n" + ) + + cat(insight::export_table(out, caption = caption, footer = footer, ...)) +} + +#' @export +print.dw_groupmeans_list <- function(x, digits = NULL, ...) { + for (i in seq_along(x)) { + if (i > 1) cat("\n") + print(x[[i]], digits = digits, ...) + } +} diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 9d321aa78..1da6b0661 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -59,6 +59,7 @@ reference: - data_codebook - data_tabulate - data_peek + - means_by_group - contains("distribution") - kurtosis - smoothness diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd new file mode 100644 index 000000000..9434452ad --- /dev/null +++ b/man/means_by_group.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/means_by_group.R +\name{means_by_group} +\alias{means_by_group} +\alias{means_by_group.numeric} +\alias{means_by_group.data.frame} +\title{Summary of mean values by group} +\usage{ +means_by_group(x, ...) + +\method{means_by_group}{numeric}(x, group = NULL, ci = 0.95, weights = NULL, digits = NULL, ...) + +\method{means_by_group}{data.frame}( + x, + select = NULL, + group = NULL, + ci = 0.95, + weights = NULL, + digits = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) +} +\arguments{ +\item{x}{A vector or a data frame.} + +\item{...}{Currently not used} + +\item{group}{If \code{x} is a numeric vector, \code{group} should be a factor that +indicates the group-classifying categories. If \code{x} is a data frame, \code{group} +should be a character string, naming the variable in \code{x} that is used for +grouping. Numeric vectors are coerced to factors. Not that \code{group} should +only refer to a single variable.} + +\item{ci}{Level of confidence interval for mean estimates. Default is \code{0.95}. +Use \code{ci = NA} to suppress confidence intervals.} + +\item{weights}{If \code{x} is a numeric vector, \code{weights} should be a vector of +weights that will be applied to weight all observations. If \code{x} is a data +frame, \code{weights} can also be a character string indicating the name of the +variable in \code{x} that should be used for weighting. Default is \code{NULL}, so no +weights are used.} + +\item{digits}{Optional scalar, indicating the amount of digits after decimal +point when rounding estimates and values.} + +\item{select}{Variables that will be included when performing the required +tasks. Can be either +\itemize{ +\item a variable specified as a literal variable name (e.g., \code{column_name}), +\item a string with the variable name (e.g., \code{"column_name"}), or a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a formula with variable names (e.g., \code{~column_1 + column_2}), +\item a vector of positive integers, giving the positions counting from the left +(e.g. \code{1} or \code{c(1, 3, 5)}), +\item a vector of negative integers, giving the positions counting from the +right (e.g., \code{-1} or \code{-1:-3}), +\item one of the following select-helpers: \code{starts_with()}, \code{ends_with()}, +\code{contains()}, a range using \code{:} or \code{regex("")}. \code{starts_with()}, +\code{ends_with()}, and \code{contains()} accept several patterns, e.g +\code{starts_with("Sep", "Petal")}. +\item or a function testing for logical conditions, e.g. \code{is.numeric()} (or +\code{is.numeric}), or any user-defined function that selects the variables +for which the function returns \code{TRUE} (like: \code{foo <- function(x) mean(x) > 3}), +\item ranges specified via literal variable names, select-helpers (except +\code{regex()}) and (user-defined) functions can be negated, i.e. return +non-matching elements, when prefixed with a \code{-}, e.g. \code{-ends_with("")}, +\code{-is.numeric} or \code{-(Sepal.Width:Petal.Length)}. \strong{Note:} Negation means +that matches are \emph{excluded}, and thus, the \code{exclude} argument can be +used alternatively. For instance, \code{select=-ends_with("Length")} (with +\code{-}) is equivalent to \code{exclude=ends_with("Length")} (no \code{-}). In case +negation should not work as expected, use the \code{exclude} argument instead. +} + +If \code{NULL}, selects all columns. Patterns that found no matches are silently +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} + +\item{exclude}{See \code{select}, however, column names matched by the pattern +from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), +excludes no columns.} + +\item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or +a regular expression is used in \code{select}, ignores lower/upper case in the +search pattern when matching against variable names.} + +\item{regex}{Logical, if \code{TRUE}, the search pattern from \code{select} will be +treated as regular expression. When \code{regex = TRUE}, select \emph{must} be a +character string (or a variable containing a character string) and is not +allowed to be one of the supported select-helpers or a character vector +of length > 1. \code{regex = TRUE} is comparable to using one of the two +select-helpers, \code{select = contains("")} or \code{select = regex("")}, however, +since the select-helpers may not work when called from inside other +functions (see 'Details'), this argument may be used as workaround.} + +\item{verbose}{Toggle warnings.} +} +\value{ +A data frame with information on mean and further summary statistics +for each sub-group. +} +\description{ +Computes summary table of means by groups. +} +\details{ +This function is comparable to \code{aggregate(x, group, mean)}, but provides +some further information, including summary statistics from a One-Way-ANOVA +using \code{x} as dependent and \code{group} as independent variable. \code{\link[emmeans:contrast]{emmeans::contrast()}} +is used to get p-values for each sub-group. P-values indicate whether each +group-mean is significantly different from the total mean. +} +\examples{ +data(efc) +means_by_group(efc, "c12hour", "e42dep") + +data(iris) +means_by_group(iris, "Sepal.Width", "Species") + +# weighting +efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) +means_by_group(efc, "c12hour", "e42dep", weights = "weight") +} diff --git a/tests/testthat/_snaps/means_by_group.md b/tests/testthat/_snaps/means_by_group.md new file mode 100644 index 000000000..78a43d8b4 --- /dev/null +++ b/tests/testthat/_snaps/means_by_group.md @@ -0,0 +1,172 @@ +# meany_by_group + + Code + means_by_group(efc, "c12hour", "e42dep") + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 95% CI | p + ---------------------------------------------------------------------- + independent | 17.00 | 2 | 11.31 | [-68.46, 102.46] | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | [-26.18, 94.68] | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | [ 29.91, 75.59] | > .999 + severely dependent | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 + Total | 86.46 | 97 | 66.40 | | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc, "c12hour", "e42dep", ci = 0.99) + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 99% CI | p + ---------------------------------------------------------------------- + independent | 17.00 | 2 | 11.31 | [-96.17, 130.17] | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | [-45.77, 114.27] | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | [ 22.50, 83.00] | > .999 + severely dependent | 106.97 | 63 | 65.88 | [ 86.80, 127.13] | 0.001 + Total | 86.46 | 97 | 66.40 | | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc, "c12hour", "e42dep", ci = NA) + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | p + --------------------------------------------------- + independent | 17.00 | 2 | 11.31 | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | > .999 + severely dependent | 106.97 | 63 | 65.88 | 0.001 + Total | 86.46 | 97 | 66.40 | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep") + Output + # Mean of Negative impact with 7 items by elder's dependency + + Category | Mean | N | SD | 95% CI | p + ----------------------------------------------------------------- + independent | 11.00 | 2 | 0.00 | [ 5.00, 17.00] | 0.567 + slightly dependent | 10.00 | 4 | 3.16 | [ 5.76, 14.24] | 0.296 + moderately dependent | 13.71 | 28 | 3.14 | [12.11, 15.32] | 0.296 + severely dependent | 14.67 | 60 | 4.78 | [13.57, 15.76] | 0.108 + Total | 14.11 | 94 | 4.34 | | + + Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118 + + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 95% CI | p + ---------------------------------------------------------------------- + independent | 17.00 | 2 | 11.31 | [-68.46, 102.46] | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | [-26.18, 94.68] | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | [ 29.91, 75.59] | > .999 + severely dependent | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 + Total | 86.46 | 97 | 66.40 | | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep", ci = NA) + Output + # Mean of Negative impact with 7 items by elder's dependency + + Category | Mean | N | SD | p + ------------------------------------------------ + independent | 11.00 | 2 | 0.00 | 0.567 + slightly dependent | 10.00 | 4 | 3.16 | 0.296 + moderately dependent | 13.71 | 28 | 3.14 | 0.296 + severely dependent | 14.67 | 60 | 4.78 | 0.108 + Total | 14.11 | 94 | 4.34 | + + Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118 + + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | p + --------------------------------------------------- + independent | 17.00 | 2 | 11.31 | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | > .999 + severely dependent | 106.97 | 63 | 65.88 | 0.001 + Total | 86.46 | 97 | 66.40 | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep", ci = 0.99) + Output + # Mean of Negative impact with 7 items by elder's dependency + + Category | Mean | N | SD | 99% CI | p + ----------------------------------------------------------------- + independent | 11.00 | 2 | 0.00 | [ 3.05, 18.95] | 0.567 + slightly dependent | 10.00 | 4 | 3.16 | [ 4.38, 15.62] | 0.296 + moderately dependent | 13.71 | 28 | 3.14 | [11.59, 15.84] | 0.296 + severely dependent | 14.67 | 60 | 4.78 | [13.22, 16.12] | 0.108 + Total | 14.11 | 94 | 4.34 | | + + Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118 + + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 99% CI | p + ---------------------------------------------------------------------- + independent | 17.00 | 2 | 11.31 | [-96.17, 130.17] | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | [-45.77, 114.27] | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | [ 22.50, 83.00] | > .999 + severely dependent | 106.97 | 63 | 65.88 | [ 86.80, 127.13] | 0.001 + Total | 86.46 | 97 | 66.40 | | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc$c12hour, efc$e42dep) + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 95% CI | p + ---------------------------------------------------------------------- + independent | 17.00 | 2 | 11.31 | [-68.46, 102.46] | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | [-26.18, 94.68] | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | [ 29.91, 75.59] | > .999 + severely dependent | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 + Total | 86.46 | 97 | 66.40 | | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + +--- + + Code + means_by_group(efc$c12hour, efc$e42dep, ci = NA) + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | p + --------------------------------------------------- + independent | 17.00 | 2 | 11.31 | 0.573 + slightly dependent | 34.25 | 4 | 29.97 | 0.626 + moderately dependent | 52.75 | 28 | 51.83 | > .999 + severely dependent | 106.97 | 63 | 65.88 | 0.001 + Total | 86.46 | 97 | 66.40 | + + Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001 + diff --git a/tests/testthat/_snaps/windows/means_by_group.md b/tests/testthat/_snaps/windows/means_by_group.md new file mode 100644 index 000000000..198069da9 --- /dev/null +++ b/tests/testthat/_snaps/windows/means_by_group.md @@ -0,0 +1,34 @@ +# meany_by_group, weighted + + Code + means_by_group(efc, "c12hour", "e42dep", weights = "weight") + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | 95% CI | p + ---------------------------------------------------------------------- + independent | 16.92 | 3 | 11.31 | [-60.82, 94.66] | 0.486 + slightly dependent | 33.56 | 4 | 29.75 | [-26.93, 94.05] | 0.593 + moderately dependent | 52.74 | 26 | 54.44 | [ 28.71, 76.76] | 0.996 + severely dependent | 108.08 | 67 | 65.40 | [ 93.01, 123.16] | < .001 + Total | 88.11 | 97 | 67.01 | | + + Anova: R2=0.191; adj.R2=0.165; F=7.329; p<.001 + +--- + + Code + means_by_group(efc, "c12hour", "e42dep", weights = "weight", ci = NA) + Output + # Mean of average number of hours of care per week by elder's dependency + + Category | Mean | N | SD | p + --------------------------------------------------- + independent | 16.92 | 3 | 11.31 | 0.486 + slightly dependent | 33.56 | 4 | 29.75 | 0.593 + moderately dependent | 52.74 | 26 | 54.44 | 0.996 + severely dependent | 108.08 | 67 | 65.40 | < .001 + Total | 88.11 | 97 | 67.01 | + + Anova: R2=0.191; adj.R2=0.165; F=7.329; p<.001 + diff --git a/tests/testthat/test-labelled_data.R b/tests/testthat/test-labelled_data.R index b0f92c730..0b7e37a4d 100644 --- a/tests/testthat/test-labelled_data.R +++ b/tests/testthat/test-labelled_data.R @@ -4,13 +4,13 @@ data(efc, package = "datawizard") test_that("reverse, labels preserved", { # factor, label - expect_equal( + expect_identical( attr(reverse(efc$e42dep), "label", exact = TRUE), "elder's dependency" ) # factor, labels - expect_equal( - names(attr(reverse(efc$e42dep), "labels", exact = TRUE)), + expect_named( + attr(reverse(efc$e42dep), "labels", exact = TRUE), names(attr(efc$e42dep, "labels", exact = TRUE)) ) expect_equal( @@ -19,13 +19,13 @@ test_that("reverse, labels preserved", { ignore_attr = TRUE ) # numeric - expect_equal( - names(attr(reverse(efc$c12hour), "labels", exact = TRUE)), + expect_named( + attr(reverse(efc$c12hour), "labels", exact = TRUE), names(attr(efc$c12hour, "labels", exact = TRUE)) ) # data frame - labels <- sapply(reverse(efc), function(i) attr(i, "label", exact = TRUE)) - expect_equal( + labels <- sapply(reverse(efc), attr, which = "label", exact = TRUE) + expect_identical( labels, c( c12hour = "average number of hours of care per week", @@ -42,8 +42,8 @@ test_that("reverse, labels preserved", { # data_merge ----------------------------------- test_that("data_merge, labels preserved", { - labels <- sapply(data_merge(efc[c(1:2)], efc[c(3:4)], verbose = FALSE), function(i) attr(i, "label", exact = TRUE)) - expect_equal( + labels <- sapply(data_merge(efc[1:2], efc[3:4], verbose = FALSE), attr, which = "label", exact = TRUE) + expect_identical( labels, c( c12hour = "average number of hours of care per week", @@ -72,8 +72,8 @@ test_that("data_extract, labels preserved", { ignore_attr = TRUE ) # data frame - labels <- sapply(data_extract(efc, select = c("e42dep", "c172code")), function(i) attr(i, "label", exact = TRUE)) - expect_equal( + labels <- sapply(data_extract(efc, select = c("e42dep", "c172code")), attr, which = "label", exact = TRUE) + expect_identical( labels, c(e42dep = "elder's dependency", c172code = "carer's level of education") ) @@ -142,8 +142,8 @@ test_that("data_rename, labels preserved", { ignore_attr = TRUE ) # data frame - labels <- sapply(data_remove(efc, starts_with("c1")), function(i) attr(i, "label", exact = TRUE)) - expect_equal( + labels <- sapply(data_remove(efc, starts_with("c1")), attr, which = "label", exact = TRUE) + expect_identical( labels, c(e16sex = "elder's gender", e42dep = "elder's dependency", neg_c_7 = "Negative impact with 7 items") ) @@ -255,12 +255,12 @@ test_that("data_match, labels preserved", { test_that("data_filter, labels preserved", { x <- data_filter(efc, c172code == 1 & c12hour > 40) # factor - expect_equal( + expect_identical( attr(x$e42dep, "label", exact = TRUE), attr(efc$e42dep, "label", exact = TRUE) ) # numeric - expect_equal( + expect_identical( attr(x$c12hour, "label", exact = TRUE), attr(efc$c12hour, "label", exact = TRUE) ) @@ -271,7 +271,9 @@ test_that("data_filter, labels preserved", { # convert_to_na ----------------------------------- test_that("convert_to_na, labels preserved", { - expect_message(x <- convert_to_na(efc, na = c(2, "2"), select = starts_with("e"))) + expect_message({ + x <- convert_to_na(efc, na = c(2, "2"), select = starts_with("e")) + }) # factor expect_equal( attr(x$e42dep, "label", exact = TRUE), @@ -301,7 +303,7 @@ test_that("convert_to_na, labels preserved", { ) # drop unused value labels x <- convert_to_na(efc$c172code, na = 2) - expect_equal( + expect_identical( attr(x, "labels", exact = TRUE), c(`low level of education` = 1, `high level of education` = 3) ) diff --git a/tests/testthat/test-means_by_group.R b/tests/testthat/test-means_by_group.R new file mode 100644 index 000000000..49226fa34 --- /dev/null +++ b/tests/testthat/test-means_by_group.R @@ -0,0 +1,21 @@ +test_that("mean_by_group", { + skip_if_not_installed("emmeans") + data(efc) + expect_snapshot(means_by_group(efc, "c12hour", "e42dep")) + expect_snapshot(means_by_group(efc, "c12hour", "e42dep", ci = 0.99)) + expect_snapshot(means_by_group(efc, "c12hour", "e42dep", ci = NA)) + expect_snapshot(means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep")) + expect_snapshot(means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep", ci = NA)) + expect_snapshot(means_by_group(efc, c("neg_c_7", "c12hour"), "e42dep", ci = 0.99)) + expect_snapshot(means_by_group(efc$c12hour, efc$e42dep)) + expect_snapshot(means_by_group(efc$c12hour, efc$e42dep, ci = NA)) +}) + +test_that("mean_by_group, weighted", { + skip_if_not_installed("emmeans") + data(efc) + set.seed(123) + efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + expect_snapshot(means_by_group(efc, "c12hour", "e42dep", weights = "weight"), variant = "windows") + expect_snapshot(means_by_group(efc, "c12hour", "e42dep", weights = "weight", ci = NA), variant = "windows") +})