Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow n() in data_modify() #535

Merged
merged 16 commits into from
Nov 21, 2024
Merged
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ CHANGES
* `data_read()` no longer shows warning about forthcoming breaking changes
in upstream packages when reading `.RData` files.

* `data_modify()` now recognizes `n()`, for example to create an index for data groups
with `1:n()` (#535).

BUG FIXES

* `describe_distribution()` no longer errors if the sample was too sparse to compute
Expand Down
21 changes: 19 additions & 2 deletions R/data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#' character vector is provided, you may not add further elements to `...`.
#' - Using `NULL` as right-hand side removes a variable from the data frame.
#' Example: `Petal.Width = NULL`.
#' - For data frames (including grouped ones), the function `n()` can be used to count the
#' number of observations and thereby, for instance, create index values by
#' using `id = 1:n()` or `id = 3:(n()+2)` and similar.
#'
#' Note that newly created variables can be used in subsequent expressions,
#' including `.at` or `.if`. See also 'Examples'.
Expand Down Expand Up @@ -92,7 +95,8 @@
#' grouped_efc,
#' c12hour_c = center(c12hour),
#' c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
#' c12hour_z2 = standardize(c12hour)
#' c12hour_z2 = standardize(c12hour),
#' id = 1:n()
#' )
#' head(new_efc)
#'
Expand Down Expand Up @@ -145,6 +149,11 @@ data_modify.default <- function(data, ...) {
data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) {
dots <- eval(substitute(alist(...)))

# error for data frames with no rows...
if (nrow(data) == 0) {
insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint
}
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved

# check if we have dots, or only at/modify ----

if (length(dots)) {
Expand Down Expand Up @@ -201,6 +210,10 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
# the data.frame method later...
dots <- match.call(expand.dots = FALSE)[["..."]]

# error for data frames with no rows...
if (nrow(data) == 0) {
insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint
}

grps <- attr(data, "groups", exact = TRUE)
grps <- grps[[".rows"]]
Expand Down Expand Up @@ -352,8 +365,12 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
# finally, we can evaluate expression and get values for new variables
symbol_string <- insight::safe_deparse(symbol)
if (!is.null(symbol_string) && all(symbol_string == "n()")) {
# "special" functions
# "special" functions - using "n()" just returns number of rows
new_variable <- nrow(data)
} else if (!is.null(symbol_string) && length(symbol_string) == 1 && grepl("\\bn\\(\\)", symbol_string)) {
# "special" functions, like "1:n()" or similar - but not "1:fun()"
symbol_string <- str2lang(gsub("n()", "nrow(data)", symbol_string, fixed = TRUE))
new_variable <- try(with(data, eval(symbol_string)), silent = TRUE)
} else {
# default evaluation of expression
new_variable <- try(with(data, eval(symbol)), silent = TRUE)
Expand Down
6 changes: 5 additions & 1 deletion man/data_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions tests/testthat/test-data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,16 @@ test_that("data_modify errors for non df", {
})


test_that("data_modify errors for empty data frames", {
data(mtcars)
x <- mtcars[1, ]
expect_error(
data_modify(x[-1, ], new_var = 5),
regex = "empty data frame"
)
})


test_that("data_modify errors for non df", {
data(efc)
a <- "center(c22hour)" # <---------------- error in variable name
Expand Down Expand Up @@ -492,6 +502,20 @@ test_that("data_modify works with functions that return character vectors", {
})


test_that("data_modify 1:n() and similar works in (grouped) data frames", {
data(mtcars)
out <- data_modify(mtcars, Trials = 1:n()) # nolint
expect_identical(out$Trials, 1:32)
x <- data_group(mtcars, "gear")
out <- data_modify(x, Trials = 1:n()) # nolint
expect_identical(out$Trials[out$gear == 3], 1:15)
expect_identical(out$Trials[out$gear == 4], 1:12)
out <- data_modify(x, Trials = 3:(n() + 2))
expect_identical(out$Trials[out$gear == 3], 3:17)
expect_identical(out$Trials[out$gear == 4], 3:14)
})


test_that("data_modify .if/.at arguments", {
data(iris)
d <- iris[1:5, ]
Expand Down Expand Up @@ -550,3 +574,31 @@ test_that("data_modify .if/.at arguments", {
out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round)
expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE)
})


skip_if_not_installed("withr")

withr::with_environment(
new.env(),
test_that("data_modify 1:n() and similar works in (grouped) data frames inside function calls", {
data(mtcars)
x <- data_group(mtcars, "gear")

foo <- function(d) {
out <- data_modify(d, Trials = 1:n()) # nolint
out$Trials
}
expect_identical(
foo(x),
c(
1L, 2L, 3L, 1L, 2L, 3L, 4L, 4L, 5L, 6L, 7L, 5L, 6L, 7L, 8L,
9L, 10L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 11L, 1L, 2L, 3L,
4L, 5L, 12L
)
)
})
)

test_that("data_modify errors on non-defined function", {
expect_error(data_modify(iris, Species = foo()))
})
Loading