From 23bbb402adbc2562de1b4c6d47ea5eaf2c0d1d1a Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Fri, 6 Jan 2023 10:34:55 +0100
Subject: [PATCH 1/4] replace `interaction()`

---
 R/group_utils.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/group_utils.R b/R/group_utils.R
index 4859841..e41d828 100644
--- a/R/group_utils.R
+++ b/R/group_utils.R
@@ -65,8 +65,13 @@ calculate_groups <- function(data, groups, drop = group_by_drop_default(data)) {
   n_comb <- nrow(unique_groups)
   rows <- rep(list(NA), n_comb)
   data_groups <- interaction(data[, groups, drop = TRUE])
+  
+  # Concatenate group variables
+  pasted_groups <- do.call(paste, c(unique_groups[, groups, drop = FALSE], sep = "."))
+  pasted_groups[is.na(unique_groups)] <- NA
+  
   for (i in seq_len(n_comb)) {
-    rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, groups]))
+    rows[[i]] <- which(data_groups %in% pasted_groups[i])
   }
 
   if (!isTRUE(drop) && any(is_factor)) {

From 59bf4d9a815ab90ee355554c297ea9c4f6fa0982 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Fri, 6 Jan 2023 13:45:13 +0100
Subject: [PATCH 2/4] add some tests when several groups contain NA

---
 inst/tinytest/test_group_by.R | 44 +++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/inst/tinytest/test_group_by.R b/inst/tinytest/test_group_by.R
index eeb56b2..a59ade1 100644
--- a/inst/tinytest/test_group_by.R
+++ b/inst/tinytest/test_group_by.R
@@ -154,8 +154,48 @@ df <- data.frame(x = 1:2, y = 1:2) %>%
   structure(class = c("grouped_df", "data.frame"))
 expect_true(group_by_drop_default(df), info = "group_by_drop_default() is forgiving about corrupt grouped df")
 
+
+# with NA in groups ---------------------------------------------------
+
+# One group
 res <- data.frame(x = c("apple", NA, "banana"), y = 1:3, stringsAsFactors = FALSE) %>%
   group_by(x) %>%
   group_data()
-expect_identical(res$x, c("apple", "banana", NA_character_), info = "group_by() puts NA groups last in STRSXP")
-expect_identical(res$.rows, list(1L, 3L, 2L), info = "group_by() puts NA groups last in STRSXP")
+
+expect_identical(
+  res$x, 
+  c("apple", "banana", NA_character_), 
+  info = "group_by() puts NA groups last in STRSXP"
+)
+expect_identical(
+  res$.rows, 
+  list(1L, 3L, 2L), 
+  info = "group_by() puts NA groups last in STRSXP"
+)
+
+# Several groups
+d <- data.frame(
+  orig = rep(c("France", "UK"), each = 4),
+  dest = rep(c("Spain", "Germany"), times = 4),
+  year = rep(c(2010, 2011), each = 4),
+  value = 1:8
+)
+d[2, 1] <- NA
+d[7, 2] <- NA
+
+res <- d %>%
+  group_by(orig, dest) %>%
+  group_data()
+
+expect_identical(nrow(res), 6L)
+expect_identical(
+  res[5:6, 1:2],
+  structure(
+    data.frame(orig = c("UK", NA), dest = c(NA, "Germany")),
+    row.names = 5:6
+  )
+)
+expect_identical(
+  vapply(res$.rows, length, FUN.VALUE = numeric(1L)),
+  c(1, 2, 2, 1, 0, 2)
+)

From 3584621515274dc4e5bd2bd2f490030e6dd28e28 Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Fri, 6 Jan 2023 13:47:43 +0100
Subject: [PATCH 3/4] fix previous test

---
 inst/tinytest/test_group_by.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tinytest/test_group_by.R b/inst/tinytest/test_group_by.R
index a59ade1..8d1aeb1 100644
--- a/inst/tinytest/test_group_by.R
+++ b/inst/tinytest/test_group_by.R
@@ -177,7 +177,7 @@ expect_identical(
 d <- data.frame(
   orig = rep(c("France", "UK"), each = 4),
   dest = rep(c("Spain", "Germany"), times = 4),
-  year = rep(c(2010, 2011), each = 4),
+  year = rep(rep(c(2010, 2011), each = 2), 2),
   value = 1:8
 )
 d[2, 1] <- NA

From e6e28e81082abf4a21cdd42e012f69b402300b3d Mon Sep 17 00:00:00 2001
From: etiennebacher <etienne.bacher@protonmail.com>
Date: Fri, 6 Jan 2023 14:42:54 +0100
Subject: [PATCH 4/4] fix case where there are NA in several groups

---
 R/group_utils.R               | 14 ++++++++------
 inst/tinytest/test_group_by.R | 12 +++++++++---
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/R/group_utils.R b/R/group_utils.R
index e41d828..8209dff 100644
--- a/R/group_utils.R
+++ b/R/group_utils.R
@@ -63,16 +63,18 @@ calculate_groups <- function(data, groups, drop = group_by_drop_default(data)) {
   unique_groups <- unique(data[, groups, drop = FALSE])
   is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x)))
   n_comb <- nrow(unique_groups)
-  rows <- rep(list(NA), n_comb)
-  data_groups <- interaction(data[, groups, drop = TRUE])
   
+  temp_id <- paste(sample(letters), collapse = "")
+
   # Concatenate group variables
-  pasted_groups <- do.call(paste, c(unique_groups[, groups, drop = FALSE], sep = "."))
-  pasted_groups[is.na(unique_groups)] <- NA
-  
+  pasted_groups <- do.call(paste, c(unique_groups, sep = "."))
+  data[[temp_id]] <- do.call(paste, c(data[, groups, drop = FALSE], sep = "."))
+
+  rows <- rep(list(NA), n_comb)
   for (i in seq_len(n_comb)) {
-    rows[[i]] <- which(data_groups %in% pasted_groups[i])
+    rows[[i]] <- which(data[[temp_id]] %in% pasted_groups[i])
   }
+  data[[temp_id]] <- NULL
 
   if (!isTRUE(drop) && any(is_factor)) {
     na_lvls <- do.call(
diff --git a/inst/tinytest/test_group_by.R b/inst/tinytest/test_group_by.R
index 8d1aeb1..888ac18 100644
--- a/inst/tinytest/test_group_by.R
+++ b/inst/tinytest/test_group_by.R
@@ -189,13 +189,19 @@ res <- d %>%
 
 expect_identical(nrow(res), 6L)
 expect_identical(
-  res[5:6, 1:2],
+  res[5:6, 1:3],
   structure(
-    data.frame(orig = c("UK", NA), dest = c(NA, "Germany")),
+    list2DF(
+      list(
+        orig = c("UK", NA), 
+        dest = c(NA, "Germany"), 
+        .rows = list(7L, 2L)
+      )
+    ),
     row.names = 5:6
   )
 )
 expect_identical(
   vapply(res$.rows, length, FUN.VALUE = numeric(1L)),
-  c(1, 2, 2, 1, 0, 2)
+  c(1, 2, 2, 1, 1, 1)
 )