From ea3ab5962e6922b46d92457ad3fb4e51d1a4055d Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 17:20:58 +0200 Subject: [PATCH 01/10] Allow `n()` in `data_modify()` --- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/data_modify.R | 12 ++++++++++-- man/data_modify.Rd | 6 +++++- tests/testthat/test-data_modify.R | 12 ++++++++++++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6661803d3..fa190448b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.12.2.1 +Version: 0.12.2.2 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 752227f4d..fc29b896f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,9 @@ CHANGES * `demean()` (and `degroup()`) now also work for nested designs, if argument `nested = TRUE` and `by` specifies more than one variable. +* `data_modify()` now also accepts the short-cut `1:n()` and similar, for example + to create an index for data groups (in grouped data frames). + # datawizard 0.12.2 * Remove `htmltools` from `Suggests` in an attempt of fixing an error in CRAN diff --git a/R/data_modify.R b/R/data_modify.R index e7744c1f5..488a1d936 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -22,6 +22,9 @@ #' character vector is provided, you may not add further elements to `...`. #' - Using `NULL` as right-hand side removes a variable from the data frame. #' Example: `Petal.Width = NULL`. +#' - For (grouped) data frames, the function `n()` can be used to count the +#' number of observations and thereby, for instance, create index values by +#' using `id = 1:n()` or `id = 3:(n()+2)` and similar. #' #' Note that newly created variables can be used in subsequent expressions, #' including `.at` or `.if`. See also 'Examples'. @@ -92,7 +95,8 @@ #' grouped_efc, #' c12hour_c = center(c12hour), #' c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE), -#' c12hour_z2 = standardize(c12hour) +#' c12hour_z2 = standardize(c12hour), +#' id = 1:n() #' ) #' head(new_efc) #' @@ -352,8 +356,12 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = # finally, we can evaluate expression and get values for new variables symbol_string <- insight::safe_deparse(symbol) if (!is.null(symbol_string) && all(symbol_string == "n()")) { - # "special" functions + # "special" functions - using "n()" just returns number of rows new_variable <- nrow(data) + } else if (!is.null(symbol_string) && length(symbol_string) == 1 && grepl("n()", symbol_string, fixed = TRUE)) { + # "special" functions, like "1:n()" or similar + symbol_string <- str2lang(gsub("n()", "nrow(data)", symbol_string, fixed = TRUE)) + new_variable <- try(with(data, eval(symbol_string)), silent = TRUE) } else { # default evaluation of expression new_variable <- try(with(data, eval(symbol)), silent = TRUE) diff --git a/man/data_modify.Rd b/man/data_modify.Rd index 042962e03..d577f440e 100644 --- a/man/data_modify.Rd +++ b/man/data_modify.Rd @@ -30,6 +30,9 @@ type of expression cannot be mixed with other expressions, i.e. if a character vector is provided, you may not add further elements to \code{...}. \item Using \code{NULL} as right-hand side removes a variable from the data frame. Example: \code{Petal.Width = NULL}. +\item For (grouped) data frames, the function \code{n()} can be used to count the +number of observations and thereby, for instance, create index values by +using \code{id = 1:n()} or \code{id = 3:(n()+2)} and similar. } Note that newly created variables can be used in subsequent expressions, @@ -109,7 +112,8 @@ new_efc <- data_modify( grouped_efc, c12hour_c = center(c12hour), c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE), - c12hour_z2 = standardize(c12hour) + c12hour_z2 = standardize(c12hour), + id = 1:n() ) head(new_efc) diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 9bb0a92d6..75e1a30f9 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -492,6 +492,18 @@ test_that("data_modify works with functions that return character vectors", { }) +test_that("data_modify 1:n() and similar works in grouped data frames", { + data(mtcars) + x <- data_group(mtcars, "gear") + out <- data_modify(x, Trials = 1:n()) + expect_identical(out$Trials[out$gear == 3], 1:15) + expect_identical(out$Trials[out$gear == 4], 1:12) + out <- data_modify(x, Trials = 3:(n()+2)) + expect_identical(out$Trials[out$gear == 3], 3:17) + expect_identical(out$Trials[out$gear == 4], 3:14) +}) + + test_that("data_modify .if/.at arguments", { data(iris) d <- iris[1:5, ] From 6698ebac624cc431b0d94d94389cbac67cc577bf Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 17:27:18 +0200 Subject: [PATCH 02/10] lintr, styler --- tests/testthat/test-data_modify.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 75e1a30f9..14bca59ca 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -495,10 +495,10 @@ test_that("data_modify works with functions that return character vectors", { test_that("data_modify 1:n() and similar works in grouped data frames", { data(mtcars) x <- data_group(mtcars, "gear") - out <- data_modify(x, Trials = 1:n()) + out <- data_modify(x, Trials = 1:n()) # nolint expect_identical(out$Trials[out$gear == 3], 1:15) expect_identical(out$Trials[out$gear == 4], 1:12) - out <- data_modify(x, Trials = 3:(n()+2)) + out <- data_modify(x, Trials = 3:(n() + 2)) expect_identical(out$Trials[out$gear == 3], 3:17) expect_identical(out$Trials[out$gear == 4], 3:14) }) From bae3ea5c1b0ff30cb48597ad7d034958fa5de1f1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 20:22:37 +0200 Subject: [PATCH 03/10] Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index fc29b896f..4a712dad1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,8 +5,8 @@ CHANGES * `demean()` (and `degroup()`) now also work for nested designs, if argument `nested = TRUE` and `by` specifies more than one variable. -* `data_modify()` now also accepts the short-cut `1:n()` and similar, for example - to create an index for data groups (in grouped data frames). +* `data_modify()` now recognizes `n()`, for example to create an index for data groups + with `1:n()` (#535). # datawizard 0.12.2 From d91a375400ffea6b0353e0deb64519f04c35b12e Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 20:22:49 +0200 Subject: [PATCH 04/10] Update R/data_modify.R Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- R/data_modify.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_modify.R b/R/data_modify.R index 488a1d936..0a12bb034 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -22,7 +22,7 @@ #' character vector is provided, you may not add further elements to `...`. #' - Using `NULL` as right-hand side removes a variable from the data frame. #' Example: `Petal.Width = NULL`. -#' - For (grouped) data frames, the function `n()` can be used to count the +#' - For data frames (including grouped ones), the function `n()` can be used to count the #' number of observations and thereby, for instance, create index values by #' using `id = 1:n()` or `id = 3:(n()+2)` and similar. #' From db57cb61bed02810b475cea2b9d03b0af74ab918 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 20:51:01 +0200 Subject: [PATCH 05/10] comments --- R/data_modify.R | 13 ++++++++++-- tests/testthat/test-data_modify.R | 35 ++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index 0a12bb034..c6ac081e9 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -149,6 +149,11 @@ data_modify.default <- function(data, ...) { data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) { dots <- eval(substitute(alist(...))) + # error for data frames with no rows... + if (nrow(data) == 0) { + insight::format_error("`data_modify()` only works for data frames with at least one row.") + } + # check if we have dots, or only at/modify ---- if (length(dots)) { @@ -205,6 +210,10 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = # the data.frame method later... dots <- match.call(expand.dots = FALSE)[["..."]] + # error for data frames with no rows... + if (nrow(data) == 0) { + insight::format_error("`data_modify()` only works for data frames with at least one row.") + } grps <- attr(data, "groups", exact = TRUE) grps <- grps[[".rows"]] @@ -358,8 +367,8 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = if (!is.null(symbol_string) && all(symbol_string == "n()")) { # "special" functions - using "n()" just returns number of rows new_variable <- nrow(data) - } else if (!is.null(symbol_string) && length(symbol_string) == 1 && grepl("n()", symbol_string, fixed = TRUE)) { - # "special" functions, like "1:n()" or similar + } else if (!is.null(symbol_string) && length(symbol_string) == 1 && grepl("\\bn\\(\\)", symbol_string)) { + # "special" functions, like "1:n()" or similar - but not "1:fun()" symbol_string <- str2lang(gsub("n()", "nrow(data)", symbol_string, fixed = TRUE)) new_variable <- try(with(data, eval(symbol_string)), silent = TRUE) } else { diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 14bca59ca..4a6c5a491 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -353,6 +353,13 @@ test_that("data_modify errors for non df", { }) +test_that("data_modify errors for empty data frames", { + data(mtcars) + x <- mtcars[1, ] + expect_error(data_modify(x[-1, ], new_var = 5), regex = "`data_modify()` only works") +}) + + test_that("data_modify errors for non df", { data(efc) a <- "center(c22hour)" # <---------------- error in variable name @@ -492,8 +499,10 @@ test_that("data_modify works with functions that return character vectors", { }) -test_that("data_modify 1:n() and similar works in grouped data frames", { +test_that("data_modify 1:n() and similar works in (grouped) data frames", { data(mtcars) + out <- data_modify(mtcars, Trials = 1:n()) # nolint + expect_identical(out$Trials, 1:32) x <- data_group(mtcars, "gear") out <- data_modify(x, Trials = 1:n()) # nolint expect_identical(out$Trials[out$gear == 3], 1:15) @@ -562,3 +571,27 @@ test_that("data_modify .if/.at arguments", { out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round) expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE) }) + + +skip_if_not_installed("withr") + +withr::with_environment( + new.env(), + test_that("data_modify 1:n() and similar works in (grouped) data frames inside function calls", { + data(mtcars) + x <- data_group(mtcars, "gear") + + foo <- function(d) { + out <- data_modify(d, Trials = 1:n()) + out$Trials + } + expect_identical( + foo(x), + c( + 1L, 2L, 3L, 1L, 2L, 3L, 4L, 4L, 5L, 6L, 7L, 5L, 6L, 7L, 8L, + 9L, 10L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 11L, 1L, 2L, 3L, + 4L, 5L, 12L + ) + ) + }) +) From c700a3b0d7e7be2d530af1f086438642f18ac2b2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 21:22:11 +0200 Subject: [PATCH 06/10] fix test --- tests/testthat/test-data_modify.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 4a6c5a491..177e61b2a 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -356,7 +356,10 @@ test_that("data_modify errors for non df", { test_that("data_modify errors for empty data frames", { data(mtcars) x <- mtcars[1, ] - expect_error(data_modify(x[-1, ], new_var = 5), regex = "`data_modify()` only works") + expect_error( + data_modify(x[-1, ], new_var = 5), + regex = "only works" + ) }) @@ -582,7 +585,7 @@ withr::with_environment( x <- data_group(mtcars, "gear") foo <- function(d) { - out <- data_modify(d, Trials = 1:n()) + out <- data_modify(d, Trials = 1:n()) # nolint out$Trials } expect_identical( From 6a851aa68e23eb63a2a0dd9f6a03ce4eef50c437 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 27 Aug 2024 21:23:50 +0200 Subject: [PATCH 07/10] update rd --- man/data_modify.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/data_modify.Rd b/man/data_modify.Rd index d577f440e..28533ecea 100644 --- a/man/data_modify.Rd +++ b/man/data_modify.Rd @@ -30,7 +30,7 @@ type of expression cannot be mixed with other expressions, i.e. if a character vector is provided, you may not add further elements to \code{...}. \item Using \code{NULL} as right-hand side removes a variable from the data frame. Example: \code{Petal.Width = NULL}. -\item For (grouped) data frames, the function \code{n()} can be used to count the +\item For data frames (including grouped ones), the function \code{n()} can be used to count the number of observations and thereby, for instance, create index values by using \code{id = 1:n()} or \code{id = 3:(n()+2)} and similar. } From df613b8f56ba823a80fb4269baa016d996de80ec Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 28 Aug 2024 09:58:12 +0200 Subject: [PATCH 08/10] modify error msg --- R/data_modify.R | 4 ++-- tests/testthat/test-data_modify.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index c6ac081e9..3e30b8f68 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -151,7 +151,7 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = # error for data frames with no rows... if (nrow(data) == 0) { - insight::format_error("`data_modify()` only works for data frames with at least one row.") + insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint } # check if we have dots, or only at/modify ---- @@ -212,7 +212,7 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = # error for data frames with no rows... if (nrow(data) == 0) { - insight::format_error("`data_modify()` only works for data frames with at least one row.") + insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint } grps <- attr(data, "groups", exact = TRUE) diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 177e61b2a..4dd449455 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -358,7 +358,7 @@ test_that("data_modify errors for empty data frames", { x <- mtcars[1, ] expect_error( data_modify(x[-1, ], new_var = 5), - regex = "only works" + regex = "empty data frame" ) }) From 40cae97989ed09cca56177f4d0c627c6d995a1ff Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Aug 2024 22:01:38 +0200 Subject: [PATCH 09/10] error on invalid function --- tests/testthat/test-data_modify.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R index 4dd449455..a7a153c43 100644 --- a/tests/testthat/test-data_modify.R +++ b/tests/testthat/test-data_modify.R @@ -598,3 +598,7 @@ withr::with_environment( ) }) ) + +test_that("data_modify errors on non-defined function", { + expect_error(data_modify(iris, Species = foo())) +}) From 458568780fc32020ab7b39bcc1dd78ee352c13ac Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 21 Nov 2024 10:16:07 +0100 Subject: [PATCH 10/10] move news item --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4420174f9..663efa310 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,6 +19,9 @@ CHANGES * `data_read()` no longer shows warning about forthcoming breaking changes in upstream packages when reading `.RData` files. +* `data_modify()` now recognizes `n()`, for example to create an index for data groups + with `1:n()` (#535). + BUG FIXES * `describe_distribution()` no longer errors if the sample was too sparse to compute @@ -70,9 +73,6 @@ CHANGES are CRAN errors occurring when building vignettes on macOS and we couldn't determine the cause after multiple patch releases (#534). -* `data_modify()` now recognizes `n()`, for example to create an index for data groups - with `1:n()` (#535). - # datawizard 0.12.2 * Remove `htmltools` from `Suggests` in an attempt of fixing an error in CRAN