nathaneastwood · etiennebacher · Jan 6, 2023 · Jan 6, 2023 · Jan 6, 2023 · Jan 6, 2023
diff --git a/R/group_utils.R b/R/group_utils.R
@@ -63,11 +63,18 @@ calculate_groups <- function(data, groups, drop = group_by_drop_default(data)) {
   unique_groups <- unique(data[, groups, drop = FALSE])
   is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x)))
   n_comb <- nrow(unique_groups)
+
+  temp_id <- paste(sample(letters), collapse = "")
+
+  # Concatenate group variables
+  pasted_groups <- do.call(paste, c(unique_groups, sep = "."))
+  data[[temp_id]] <- do.call(paste, c(data[, groups, drop = FALSE], sep = "."))
+
   rows <- rep(list(NA), n_comb)
-  data_groups <- interaction(data[, groups, drop = TRUE])
   for (i in seq_len(n_comb)) {
-    rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, groups]))
+    rows[[i]] <- which(data[[temp_id]] %in% pasted_groups[i])
   }
+  data[[temp_id]] <- NULL
 
   if (!isTRUE(drop) && any(is_factor)) {
     na_lvls <- do.call(

diff --git a/inst/tinytest/test_group_by.R b/inst/tinytest/test_group_by.R
@@ -154,8 +154,54 @@ df <- data.frame(x = 1:2, y = 1:2) %>%
   structure(class = c("grouped_df", "data.frame"))
 expect_true(group_by_drop_default(df), info = "group_by_drop_default() is forgiving about corrupt grouped df")
 
+
+# with NA in groups ---------------------------------------------------
+
+# One group
 res <- data.frame(x = c("apple", NA, "banana"), y = 1:3, stringsAsFactors = FALSE) %>%
   group_by(x) %>%
   group_data()
-expect_identical(res$x, c("apple", "banana", NA_character_), info = "group_by() puts NA groups last in STRSXP")
-expect_identical(res$.rows, list(1L, 3L, 2L), info = "group_by() puts NA groups last in STRSXP")
+
+expect_identical(
+  res$x, 
+  c("apple", "banana", NA_character_), 
+  info = "group_by() puts NA groups last in STRSXP"
+)
+expect_identical(
+  res$.rows, 
+  list(1L, 3L, 2L), 
+  info = "group_by() puts NA groups last in STRSXP"
+)
+
+# Several groups
+d <- data.frame(
+  orig = rep(c("France", "UK"), each = 4),
+  dest = rep(c("Spain", "Germany"), times = 4),
+  year = rep(rep(c(2010, 2011), each = 2), 2),
+  value = 1:8
+)
+d[2, 1] <- NA
+d[7, 2] <- NA
+
+res <- d %>%
+  group_by(orig, dest) %>%
+  group_data()
+
+expect_identical(nrow(res), 6L)
+expect_identical(
+  res[5:6, 1:3],
+  structure(
+    list2DF(
+      list(
+        orig = c("UK", NA), 
+        dest = c(NA, "Germany"), 
+        .rows = list(7L, 2L)
+      )
+    ),
+    row.names = 5:6
+  )
+)
+expect_identical(
+  vapply(res$.rows, length, FUN.VALUE = numeric(1L)),
+  c(1, 2, 2, 1, 1, 1)
+)