Skip to content

Commit

Permalink
Draft row_count()
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Oct 10, 2024
1 parent 9dff2ae commit 1dcec60
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 1 deletion.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.13.0.2
Version: 0.13.0.4
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ export(reshape_longer)
export(reshape_wider)
export(reverse)
export(reverse_scale)
export(row_count)
export(row_means)
export(row_to_colnames)
export(rowid_as_column)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ CHANGES
variables, can now also be a character vector with quoted variable names,
including a colon to indicate a range of several variables (e.g. `"cyl:gear"`).

* New function `row_count()`, to calculate row-wise sums of specific values.

BUG FIXES

* `describe_distribution()` no longer errors if the sample was too sparse to compute
Expand Down
68 changes: 68 additions & 0 deletions R/row_count.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' @title Row means or sums (optionally with minimum amount of valid values)
#' @name row_count
#' @description `row_count()` mimics base R's `rowSums()`, with sums for a
#' specific value indicated by `count`. Hence, it is equivalent to
#' `rowSums(x == count, na.rm = TRUE)`.
#'
#' @param data A data frame with at least two columns, where number of specific
#' values are counted row-wise.
#' @param count The value for which the row sum should be computed. May be a
#' numeric value, a character string (for factors or character vectors), `NA` or
#' `Inf`.
#' @inheritParams extract_column_names
#' @inheritParams row_means
#'
#' @return A vector with row-wise counts of values specified in `count`.
#'
#' @examples
#' dat <- data.frame(
#' c1 = c(1, 2, NA, 4),
#' c2 = c(NA, 2, NA, 5),
#' c3 = c(NA, 4, NA, NA),
#' c4 = c(2, 3, 7, 8)
#' )
#'
#' # count all 2s per row
#' row_count(dat, count = 2)
#' # count all missing values per row
#' row_count(dat, count = NA)
#'
#' @export
row_count <- function(data,
select = NULL,
exclude = NULL,
count = NULL,
ignore_case = FALSE,
regex = FALSE,
verbose = TRUE) {
# evaluate arguments
select <- .select_nse(select,
data,
exclude,
ignore_case = ignore_case,
regex = regex,
verbose = verbose
)

if (is.null(count)) {
insight::format_error("`count` must be a valid value (including `NA` or `Inf`), but not `NULL`.")
}

if (is.null(select) || length(select) == 0) {
insight::format_error("No columns selected.")
}

data <- .coerce_to_dataframe(data[select])

# check if we have a data framme with at least two columns
if (ncol(data) < 2) {
insight::format_error("`data` must be a data frame with at least two numeric columns.")
}

# special case: count missing
if (is.na(count)) {
rowSums(is.na(data))
} else {
rowSums(data == count, na.rm = TRUE)
}
}
99 changes: 99 additions & 0 deletions man/row_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkgdown/_pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ reference:
- kurtosis
- smoothness
- skewness
- row_count
- row_means
- weighted_mean
- mean_sd
Expand Down
25 changes: 25 additions & 0 deletions tests/testthat/test-row_count.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_that("row_count", {
d_mn <- data.frame(
c1 = c(1, 2, NA, 4),
c2 = c(NA, 2, NA, 5),
c3 = c(NA, 4, NA, NA),
c4 = c(2, 3, 7, 8)
)
expect_identical(row_count(d_mn, count = 2), c(1, 2, 0, 0))
expect_identical(row_count(d_mn, count = NA), c(2, 0, 3, 1))
d_mn <- data.frame(

Check warning on line 10 in tests/testthat/test-row_count.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-row_count.R,line=10,col=11,[strings_as_factors_linter] Supply an explicit value for stringsAsFactors for this code to work before and after R version 4.0.
c1 = c("a", "b", NA, "c"),
c2 = c(NA, "b", NA, "d"),
c3 = c(NA, 4, NA, NA),
c4 = c(2, 3, 7, Inf)
)
expect_identical(row_count(d_mn, count = "b"), c(0, 2, 0, 0))
expect_identical(row_count(d_mn, count = Inf), c(0, 0, 0, 1))
})

test_that("row_means, errors or messages", {
data(iris)
expect_error(expect_warning(row_count(iris, select = "abc")), regex = "must be a valid")
expect_error(expect_warning(row_count(iris, select = "abc", count = 3)), regex = "no columns")
expect_error(row_count(iris[1], count = 3), regex = "with at least")
})

0 comments on commit 1dcec60

Please sign in to comment.