From ea1c28cd7040616bdcac3894759e05cb44098957 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Tue, 6 Aug 2024 21:05:42 +0200
Subject: [PATCH 1/6] More compact tests

---
 R/test-agnostic.R | 173 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 R/test-agnostic.R

diff --git a/R/test-agnostic.R b/R/test-agnostic.R
new file mode 100644
index 0000000..beb4f17
--- /dev/null
+++ b/R/test-agnostic.R
@@ -0,0 +1,173 @@
+# Model with non-linearities and interactions
+fit <- lm(
+  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species + Petal.Length, data = iris
+)
+x <- c("Petal.Width", "Species", "Petal.Length")
+preds <- unname(predict(fit, iris))
+J <- c(1L, 51L, 101L)
+
+shap <- list(
+  kernelshap(fit, iris[x], bg_X = iris, verbose = FALSE),
+  permshap(fit, iris[x], bg_X = iris, verbose = FALSE)
+)
+
+test_that("baseline equals average prediction on background data", {
+  for (s in shap)
+    expect_equal(s$baseline, mean(iris$Sepal.Length))
+})
+
+test_that("SHAP + baseline = prediction for exact mode", {
+  for (s in shap)
+    expect_equal(rowSums(s$S) + s$baseline, preds)
+})
+
+test_that("auto-selection of background data works", {
+  # Here, the background data equals the full X
+  shap2 <- list(
+    kernelshap(fit, iris[x], verbose = FALSE),
+    permshap(fit, iris[x], verbose = FALSE)
+  )
+  
+  for (i in 1:2) {
+    expect_equal(shap$S, shap2$S)
+  }
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  for (algo in c(kernelshap, permshap))
+    expect_error(algo(fit, iris[1:10, x], verbose = FALSE))  
+  
+})
+
+test_that("missing bg_X gives warning if X is quite small", {
+  for (algo in c(kernelshap, permshap))
+    expect_warning(algo(fit, iris[1:30, x], verbose = FALSE))
+})
+
+test_that("selection of bg_X can be controlled via bg_n", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
+    expect_equal(nrow(s$bg_X), 20L)
+  }
+})
+
+test_that("verbose is chatty", {
+  capture_output(
+    expect_message(
+      kernelshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = TRUE)
+    )
+  )
+})
+
+test_that("large background data cause warning", {
+  large_bg <- iris[rep(1:150, 230), ]
+  expect_warning(
+    kernelshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
+  )
+})
+
+test_that("using foreach (non-parallel) gives the same as normal mode", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, x], bg_X = iris, verbose = FALSE)
+    s2 <- suppressWarnings(
+      algo(fit, iris[J, x], bg_X = iris, verbose = FALSE, parallel = TRUE)
+    )
+    expect_equal(s, s2)
+  }
+})
+
+test_that("verbose is chatty", {
+  for (algo in c(kernelshap, permshap)) {
+    capture_output(expect_message(algo(fit, iris[J, x], bg_X = iris, verbose = TRUE)))
+  }
+})
+
+test_that("large background data cause warning", {
+  # Takes a bit of time, thus only for one algo
+  large_bg <- iris[rep(1:150, 230), ]
+  expect_warning(
+    kernelshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
+  )
+})
+
+test_that("Decomposing a single row works", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1L, x], bg_X = iris, verbose = FALSE)
+    expect_equal(s$baseline, mean(iris$Sepal.Length))
+    expect_equal(rowSums(s$S) + s$baseline, preds[1])
+  }
+})
+
+test_that("Background data can contain additional columns", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1L, x], bg_X = cbind(d = 1, iris), verbose = FALSE)
+    expect_true(is.kernelshap(s))
+  }
+})
+
+test_that("Background data can contain only one single row", {
+  for (algo in c(kernelshap, permshap))
+    expect_no_error(algo(fit, iris[1L, x], bg_X = iris[150L, ], verbose = FALSE))
+})
+
+test_that("feature_names can drop columns from SHAP calculations", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, ], bg_X = iris, feature_names = x, verbose = FALSE)
+    expect_equal(colnames(s$S), x)
+  }
+})
+
+test_that("feature_names can rearrange column names in result", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, ], bg_X = iris, feature_names = rev(x), verbose = FALSE)
+    expect_equal(colnames(s$S), rev(x))
+  }
+})
+
+test_that("feature_names must be in colnames(X) and colnames(bg_X)", {
+  for (algo in c(kernelshap, permshap)) {
+    expect_error(algo(fit, iris, bg_X = cbind(iris, a = 1), feature_names = "a"))
+    expect_error(algo(fit, cbind(iris, a = 1), bg_X = iris, feature_names = "a"))
+  }
+})
+
+test_that("Matrix input is fine", {
+  X <- data.matrix(iris)
+  pred_fun <- function(m, X) {
+    data <- as.data.frame(X) |> 
+      transform(Species = factor(Species, labels = levels(iris$Species)))
+    predict(m, data)
+  }
+  
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, X[J, x], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
+    
+    expect_equal(s$baseline, mean(iris$Sepal.Length))  # baseline is mean of bg
+    expect_equal(rowSums(s$S) + s$baseline, preds[J])  # sum shap = centered preds
+    expect_no_error(                                   # additional cols in bg are ok
+      algo(fit, X[J, x], pred_fun = pred_fun, bg_X = cbind(d = 1, X), verbose = FALSE)
+    )
+    expect_error(                                      # feature_names are less flexible
+      algo(fit, X[J, ], pred_fun = pred_fun, bg_X = X, 
+           verbose = FALSE, feature_names = "Sepal.Width")
+    )
+  }
+})
+
+test_that("Special case p = 1 works only for kernelshap()", {
+  capture_output(
+    expect_message(
+      s <- kernelshap(fit, X = iris[J, ], bg_X = iris, feature_names = "Petal.Width")
+    )
+  )
+  expect_equal(s$baseline, mean(iris$Sepal.Length))
+  expect_equal(unname(rowSums(s$S)) + s$baseline, preds[J])
+  expect_equal(s$SE[1L], 0)
+  
+  expect_error(  # Not implemented
+    permshap(
+      fit, iris[J, ], bg_X = iris, verbose = FALSE, feature_names = "Petal.Width"
+    )
+  )
+})
+

From ac070908db95eebafa420401dcadf01ab924c15b Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Tue, 6 Aug 2024 22:00:13 +0200
Subject: [PATCH 2/6] Move new test script

---
 {R => tests/testthat}/test-agnostic.R | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {R => tests/testthat}/test-agnostic.R (100%)

diff --git a/R/test-agnostic.R b/tests/testthat/test-agnostic.R
similarity index 100%
rename from R/test-agnostic.R
rename to tests/testthat/test-agnostic.R

From 3e02c418d4448d91d7b250f05ec1c0acc2cdb5e1 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Wed, 7 Aug 2024 08:01:35 +0200
Subject: [PATCH 3/6] Revise test-additive_shap

---
 tests/testthat/test-additive_shap.R | 89 ++++++++++-------------------
 1 file changed, 30 insertions(+), 59 deletions(-)

diff --git a/tests/testthat/test-additive_shap.R b/tests/testthat/test-additive_shap.R
index d80d447..979dfc5 100644
--- a/tests/testthat/test-additive_shap.R
+++ b/tests/testthat/test-additive_shap.R
@@ -1,60 +1,30 @@
-test_that("simple additive formula gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ .
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
-  
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-  
-  X <- head(iris[-1L])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
+test_that("Additive formulas give same as permshap() with full training data as bg data", {
+  formulas <- list(
+    Sepal.Length ~ .,
+    Sepal.Length ~ log(Sepal.Width) + poly(Sepal.Width, 2) + Petal.Length,
+    form <- Sepal.Length ~ log(Sepal.Width) + Species + poly(Petal.Length, 2)
   )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
-})
-
-test_that("formula where feature appears in two terms gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ log(Sepal.Width) + poly(Sepal.Width, 2) + Petal.Length
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
-  
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-  
-  X <- head(iris[2:3])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
+  xvars <- list(
+    setdiff(colnames(iris), "Sepal.Length"),
+    c("Sepal.Width", "Petal.Length"),
+    xvars <- c("Sepal.Width", "Petal.Length", "Species")
   )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
-})
-
-test_that("formula with complicated terms gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ 
-    log(Sepal.Width) + Species + poly(Petal.Length, 2)
-  
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
   
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-  
-  X <- head(iris[c(2, 3, 5)])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
-  )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
+  for (j in seq_along(formulas)) {
+    fit <- list(
+      lm = lm(formulas[[j]], data = iris),
+      glm = glm(formulas[[j]], data = iris, family = quasipoisson)
+    )
+    
+    shap1 <- lapply(fit, additive_shap, head(iris), verbose = FALSE)
+    shap2 <- lapply(
+      fit, permshap, head(iris), bg_X = iris, verbose = FALSE, feature_names = xvars[[j]]
+    )
+    
+    for (i in seq_along(fit)) {
+      expect_equal(shap1[[i]]$S, shap2[[i]]$S)
+    }
+  }
 })
 
 test_that("formulas with more than one covariate per term fail", {
@@ -65,10 +35,11 @@ test_that("formulas with more than one covariate per term fail", {
   )
   
   for (formula in formulas_bad) {
-    fit <- lm(formula, data = iris)
-    expect_error(s <- additive_shap(fit, head(iris), verbose = FALSE))
-    
-    fit <- glm(formula, data = iris, family = quasipoisson)
-    expect_error(s <- additive_shap(fit, head(iris), verbose = FALSE))
+    fit <- list(
+      lm = lm(formula, data = iris),
+      glm = glm(formula, data = iris, family = quasipoisson)
+    )
+    for (f in fit)
+      expect_error(additive_shap(f, head(iris), verbose = FALSE))
   }  
 })

From 1ff73265bef3383f450ef8ad406b69e6a3ed667f Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Wed, 7 Aug 2024 19:41:00 +0200
Subject: [PATCH 4/6] reorganize tests

---
 tests/test-weights.R                          |  92 ++++++
 tests/testthat/test-additive_shap.R           |   7 +-
 .../{test-agnostic.R => test-basic.R}         |  49 ++--
 tests/testthat/test-kernelshap-utils.R        |   3 +-
 tests/testthat/test-kernelshap.R              | 267 ------------------
 tests/testthat/test-methods.R                 |  39 ++-
 tests/testthat/test-permshap-utils.R          |   1 +
 tests/testthat/test-permshap.R                | 197 -------------
 tests/testthat/test-utils.R                   |   1 +
 9 files changed, 152 insertions(+), 504 deletions(-)
 create mode 100644 tests/test-weights.R
 rename tests/testthat/{test-agnostic.R => test-basic.R} (81%)
 delete mode 100644 tests/testthat/test-kernelshap.R
 delete mode 100644 tests/testthat/test-permshap.R

diff --git a/tests/test-weights.R b/tests/test-weights.R
new file mode 100644
index 0000000..faad768
--- /dev/null
+++ b/tests/test-weights.R
@@ -0,0 +1,92 @@
+# Model with non-linearities and interactions
+fit <- lm(
+  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species,
+  data = iris,
+  weights = Petal.Length
+)
+x <- c("Petal.Width", "Species")
+preds <- unname(predict(fit, iris))
+J <- c(1L, 51L, 101L)
+w <- iris$Petal.Length
+
+shap <- list(
+  kernelshap(fit, iris[x], bg_X = iris, bg_w = w, verbose = FALSE),
+  permshap(fit, iris[x], bg_X = iris, bg_w = w, verbose = FALSE)
+)
+
+test_that("constant weights gives same as no weights", {
+  shap_unweighted <- list(
+    kernelshap(fit, iris[x], bg_X = iris, verbose = FALSE),
+    permshap(fit, iris[x], bg_X = iris, verbose = FALSE)
+  )
+  
+  w2 <- rep(3, nrow(iris))
+  shap2 <- list(
+    kernelshap(fit, iris[x], bg_X = iris, bg_w = w2, verbose = FALSE),
+    permshap(fit, iris[x], bg_X = iris, bg_w = w2, verbose = FALSE)
+  )
+  
+  for (i in seq_along(shap))
+    expect_equal(shap2[[i]]$S, shap_unweighted[[i]]$S)
+})
+
+test_that("baseline equals average prediction on background data", {
+  for (s in shap)
+    expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, w))
+})
+
+test_that("SHAP + baseline = prediction for exact mode", {
+  for (s in shap)
+    expect_equal(rowSums(s$S) + s$baseline, preds)
+})
+
+test_that("Decomposing a single row works", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1L, x], bg_X = iris, bg_w = w, verbose = FALSE)
+    expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, w))
+    expect_equal(rowSums(s$S) + s$baseline, preds[1])
+  }
+})
+
+test_that("auto-selection of background data works", {
+  # Here, the background data equals the full X
+  shap2 <- list(
+    kernelshap(fit, iris[x], bg_w = w, verbose = FALSE),
+    permshap(fit, iris[x], bg_w = w, verbose = FALSE)
+  )
+  
+  for (i in 1:2) {
+    expect_equal(shap$S, shap2$S)
+  }
+})
+
+test_that("selection of bg_X can be controlled via bg_n", {
+  n <- 20L
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris, bg_w = w, verbose = FALSE, bg_n = n)
+    expect_equal(nrow(s$bg_X), n)
+  }
+})
+
+test_that("weights must have correct length", {
+  for (algo in c(kernelshap, permshap)) {
+    expect_error(algo(fit, iris[J, ], bg_X = iris, bg_w = 1:3, verbose = FALSE))
+  }
+})
+
+test_that("weights can't be all 0", {
+  for (algo in c(kernelshap, permshap)) {
+    expect_error(
+      algo(fit, iris[J, ], bg_X = iris, bg_w = rep(0, nrow(iris)), verbose = FALSE)
+    )
+  }
+})
+
+test_that("weights can't be negative", {
+  for (algo in c(kernelshap, permshap)) {
+    expect_error(
+      algo(fit, iris[J, ], bg_X = iris, bg_w = rep(-1, nrow(iris)), verbose = FALSE)
+    )
+  }
+})
+
diff --git a/tests/testthat/test-additive_shap.R b/tests/testthat/test-additive_shap.R
index 979dfc5..0d1da78 100644
--- a/tests/testthat/test-additive_shap.R
+++ b/tests/testthat/test-additive_shap.R
@@ -1,4 +1,4 @@
-test_that("Additive formulas give same as permshap() with full training data as bg data", {
+test_that("Additive formulas give same as agnostic SHAP with full training data as bg data", {
   formulas <- list(
     Sepal.Length ~ .,
     Sepal.Length ~ log(Sepal.Width) + poly(Sepal.Width, 2) + Petal.Length,
@@ -20,9 +20,13 @@ test_that("Additive formulas give same as permshap() with full training data as
     shap2 <- lapply(
       fit, permshap, head(iris), bg_X = iris, verbose = FALSE, feature_names = xvars[[j]]
     )
+    shap3 <- lapply(
+      fit, kernelshap, head(iris), bg_X = iris, verbose = FALSE, feature_names = xvars[[j]]
+    )
     
     for (i in seq_along(fit)) {
       expect_equal(shap1[[i]]$S, shap2[[i]]$S)
+      expect_equal(shap1[[i]]$S, shap3[[i]]$S)
     }
   }
 })
@@ -43,3 +47,4 @@ test_that("formulas with more than one covariate per term fail", {
       expect_error(additive_shap(f, head(iris), verbose = FALSE))
   }  
 })
+
diff --git a/tests/testthat/test-agnostic.R b/tests/testthat/test-basic.R
similarity index 81%
rename from tests/testthat/test-agnostic.R
rename to tests/testthat/test-basic.R
index beb4f17..0a9225a 100644
--- a/tests/testthat/test-agnostic.R
+++ b/tests/testthat/test-basic.R
@@ -46,26 +46,11 @@ test_that("missing bg_X gives warning if X is quite small", {
 
 test_that("selection of bg_X can be controlled via bg_n", {
   for (algo in c(kernelshap, permshap)) {
-    s <- algo(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
+    s <- algo(fit, iris[x], verbose = FALSE, bg_n = 20L)
     expect_equal(nrow(s$bg_X), 20L)
   }
 })
 
-test_that("verbose is chatty", {
-  capture_output(
-    expect_message(
-      kernelshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = TRUE)
-    )
-  )
-})
-
-test_that("large background data cause warning", {
-  large_bg <- iris[rep(1:150, 230), ]
-  expect_warning(
-    kernelshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
-  )
-})
-
 test_that("using foreach (non-parallel) gives the same as normal mode", {
   for (algo in c(kernelshap, permshap)) {
     s <- algo(fit, iris[J, x], bg_X = iris, verbose = FALSE)
@@ -171,3 +156,35 @@ test_that("Special case p = 1 works only for kernelshap()", {
   )
 })
 
+test_that("exact hybrid kernelshap() is similar to exact (non-hybrid)", {
+  s1 <- kernelshap(
+    fit, iris[J, x], bg_X = iris, exact = FALSE, hybrid_degree = 1L, verbose = FALSE
+  )
+  expect_equal(s1$S, shap[[1L]]$S[J, ])
+})
+
+test_that("baseline equals average prediction on background data in sampling mode", {
+  s2 <- s_sampling <- kernelshap(
+    fit, iris[J, x], bg_X = iris, hybrid_degree = 0L, verbose = FALSE, exact = FALSE
+  )
+  expect_equal(s2$baseline, mean(iris$Sepal.Length))
+})
+
+test_that("SHAP + baseline = prediction for sampling mode", {
+  s2 <- s_sampling <- kernelshap(
+    fit, iris[J, x], bg_X = iris, hybrid_degree = 0L, verbose = FALSE, exact = FALSE
+  )
+  expect_equal(rowSums(s2$S) + s2$baseline, preds[J])
+})
+
+test_that("kernelshap works for large p (hybrid case)", {
+  set.seed(9L)
+  X <- data.frame(matrix(rnorm(20000L), ncol = 100L))
+  y <- X[, 1L] * X[, 2L] * X[, 3L]
+  fit <- lm(y ~ X1:X2:X3 + ., data = cbind(y = y, X))
+  s <- kernelshap(fit, X[1L, ], bg_X = X, verbose = FALSE)
+  
+  expect_equal(s$baseline, mean(y))
+  expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
+})
+
diff --git a/tests/testthat/test-kernelshap-utils.R b/tests/testthat/test-kernelshap-utils.R
index ac4ffdf..098a21f 100644
--- a/tests/testthat/test-kernelshap-utils.R
+++ b/tests/testthat/test-kernelshap-utils.R
@@ -1,4 +1,4 @@
-test_that("Sum of kernel weights is 1", {
+test_that("sum of kernel weights is 1", {
   for (p in 2:10) {
     expect_equal(sum(kernel_weights(p)), 1.0)
   }
@@ -121,3 +121,4 @@ test_that("input_partly_exact(p, deg) fails for bad p or deg", {
   expect_error(input_partly_exact(2L, deg = 0L, feature_names = LETTERS[1:p]))
   expect_error(input_partly_exact(5L, deg = 3L, feature_names = LETTERS[1:p]))
 })
+
diff --git a/tests/testthat/test-kernelshap.R b/tests/testthat/test-kernelshap.R
deleted file mode 100644
index 340bd93..0000000
--- a/tests/testthat/test-kernelshap.R
+++ /dev/null
@@ -1,267 +0,0 @@
-# Model with non-linearities and interactions
-fit <- lm(
-  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species + Petal.Length, data = iris
-)
-x <- c("Petal.Width", "Species", "Petal.Length")
-preds <- unname(predict(fit, iris))
-s <- kernelshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE)
-
-test_that("Baseline equals average prediction on background data in exact mode", {
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-})
-
-test_that("SHAP + baseline = prediction for exact mode", {
-  expect_equal(rowSums(s$S) + s$baseline, preds[c(1L, 51L, 101L)])
-})
-
-test_that("background data is automatically selected", {
-  # Here, the background data equals the full X
-  s2 <- kernelshap(fit, iris[, x], verbose = FALSE)
-  expect_equal(s$S, s2$S[c(1L, 51L, 101L), ])
-})
-
-test_that("missing bg_X gives error if X is very small", {
-  expect_error(kernelshap(fit, iris[1:10, x], verbose = FALSE))
-})
-
-test_that("missing bg_X gives error if X is very small", {
-  expect_warning(kernelshap(fit, iris[1:30, x], verbose = FALSE))
-})
-
-test_that("selection of bg_X can be controlled via bg_n", {
-  s2 <- kernelshap(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
-  expect_equal(nrow(s2$bg_X), 20L)
-})
-
-test_that("Exact hybrid calculation is similar to exact (non-hybrid)", {
-  s1 <- kernelshap(
-    fit, 
-    iris[c(1L, 51L, 101L), x], 
-    bg_X = iris,
-    exact = FALSE, 
-    hybrid_degree = 1L, 
-    verbose = FALSE
-  )
-  expect_equal(s$S, s1$S)
-})
-
-s_sampling <- kernelshap(
-  fit, 
-  iris[c(1L, 51L, 101L), x], 
-  bg_X = iris, 
-  hybrid_degree = 0L,
-  verbose = FALSE,
-  exact = FALSE
-)
-
-test_that("Baseline equals average prediction on background data in sampling mode", {
-  expect_equal(s_sampling$baseline, mean(iris$Sepal.Length))
-})
-
-test_that("SHAP + baseline = prediction for sampling mode", {
-  expect_equal(rowSums(s_sampling$S) + s_sampling$baseline, preds[c(1L, 51L, 101L)])
-})
-
-test_that("verbose is chatty", {
-  capture_output(
-    expect_message(
-      kernelshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = TRUE)
-    )
-  )
-})
-
-test_that("large background data cause warning", {
-  large_bg <- iris[rep(1:150, 230), ]
-  expect_warning(
-    kernelshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
-  )
-})
-
-test_that("using foreach (non-parallel) gives the same as normal mode", {
-  s_foreach <- suppressWarnings(
-    kernelshap(
-      fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE, parallel = TRUE
-    )
-  )
-  expect_equal(s, s_foreach)
-})
-
-test_that("Decomposing a single row works", {
-  s <- kernelshap(fit, iris[1L, x], bg_X = iris, verbose = FALSE)
-  
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1])
-})
-
-test_that("Background data can contain additional columns", {
-  ks4 <- kernelshap(fit, iris[1L, x], bg_X = cbind(d = 1, iris), verbose = FALSE)
-  expect_true(is.kernelshap(ks4))
-})
-
-test_that("Background data can contain only one single row", {
-  expect_true(
-    is.kernelshap(kernelshap(fit, iris[1L, x], bg_X = iris[150L, ], verbose = FALSE))
-  )
-  expect_true(
-    is.kernelshap(kernelshap(fit, iris[1:10, x], bg_X = iris[150L, ], verbose = FALSE))
-  )
-})
-
-test_that("feature_names can drop columns from SHAP calculations", {
-  s_f <- kernelshap(
-    fit, iris[c(1L, 51L, 101L), ], bg_X = iris, feature_names = x, verbose = FALSE
-  )
-  expect_equal(within(unclass(s), rm(X)), within(unclass(s_f), rm(X)))
-})
-
-test_that("feature_names can rearrange column names in result", {
-  s_f2 <- kernelshap(
-    fit, iris[c(1L, 51L, 101L), ], bg_X = iris, feature_names = rev(x), verbose = FALSE
-  )
-  expect_equal(s$S, s_f2$S[, x])
-})
-
-test_that("feature_names must be in colnames(X) and colnames(bg_X)", {
-  expect_error(kernelshap(fit, iris, bg_X = cbind(iris, a = 1), feature_names = "a"))
-  expect_error(kernelshap(fit, cbind(iris, a = 1), bg_X = iris, feature_names = "a"))
-})
-
-fit <- lm(Sepal.Length ~ poly(Petal.Width, degree = 2L), data = iris)
-x <- "Petal.Width"
-preds <- unname(predict(fit, iris))
-
-test_that("Special case p = 1 works", {
-  s <- kernelshap(fit, iris[1:5, x, drop = FALSE], bg_X = iris, verbose = FALSE)
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-  expect_equal(unname(rowSums(s$S)) + s$baseline, preds[1:5])
-  expect_equal(s$SE[1L], 0)
-})
-
-test_that("Special case p = 1 is chatty with verbose = TRUE", {
-  capture_output(
-    expect_message(
-      kernelshap(fit, iris[1:5, x, drop = FALSE], bg_X = iris, verbose = TRUE)
-    )
-  )
-})
-
-fit <- lm(Sepal.Length ~ ., data = iris[1:4])
-X <- data.matrix(iris[2:4])
-pred_fun <- function(m, X) predict(m, as.data.frame(X))
-preds <- unname(pred_fun(fit, X))
-s <- kernelshap(fit, X[1:3, ], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
-
-test_that("Matrix input is fine", {
-  expect_true(is.kernelshap(s))
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:3])
-})
-
-test_that("Matrix input works if bg data containts extra columns", {
-  ks5 <- kernelshap(
-    fit, X[1:3, ], pred_fun = pred_fun, bg_X = cbind(d = 1, X), verbose = FALSE
-  )
-  expect_true(is.kernelshap(ks5))
-})
-
-test_that("Matrix input gives error with inconsistent feature_names", {
-  expect_error(
-    kernelshap(
-      fit, 
-      X[1:3, ], 
-      pred_fun = pred_fun, 
-      bg_X = X, 
-      verbose = FALSE, 
-      feature_names = "Sepal.Width"
-    )
-  )
-})
-
-
-## Now with case weights
-fit <- lm(
-  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species, data = iris, 
-  weights = Petal.Length
-)
-x <- c("Petal.Width", "Species")
-preds <- unname(predict(fit, iris))
-s <- kernelshap(
-  fit, iris[1:5, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE
-)
-
-test_that("Baseline equals weighted average prediction on background data", {
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Petal.Length))
-})
-
-test_that("SHAP + baseline = prediction works with case weights", {
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:5])
-})
-
-test_that("selection of bg_X and bg_w can be controlled via bg_n", {
-  s2 <- kernelshap(
-    fit, iris[1:30, x], verbose = FALSE, bg_w = iris$Petal.Length[1:30], bg_n = 20L
-  )
-  expect_equal(nrow(s2$bg_X), 20L)
-  expect_equal(length(s2$bg_w), 20L)
-})
-
-test_that("Decomposing a single row works with case weights", {
-  s <- kernelshap(
-    fit, iris[1L, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE
-  )
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Petal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1L])
-})
-
-fit <- lm(
-  Sepal.Length ~ poly(Petal.Width, degree = 2L), 
-  data = iris, 
-  weights = Petal.Length
-)
-x <- "Petal.Width"
-preds <- unname(predict(fit, iris))
-
-test_that("Special case p = 1 works with case weights", {
-  s <- kernelshap(
-    fit, 
-    iris[1:5, x, drop = FALSE], 
-    bg_X = iris, 
-    bg_w = iris$Petal.Length, 
-    verbose = FALSE
-  )
-  
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Petal.Length))
-  expect_equal(unname(rowSums(s$S)) + s$baseline, preds[1:5])
-})
-
-fit <- lm(
-  Sepal.Length ~ . , data = iris[c(1L, 3L, 4L)], weights = iris$Sepal.Width
-)
-X <- data.matrix(iris[3:4])
-preds <- unname(pred_fun(fit, X))
-
-test_that("Matrix input is fine with case weights", {
-  s <- kernelshap(
-    fit, X[1:3, ], 
-    pred_fun = pred_fun, 
-    bg_X = X, 
-    bg_w = iris$Sepal.Width, 
-    verbose = FALSE
-  )
-  
-  expect_true(is.kernelshap(s))
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Sepal.Width))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:3])
-})
-
-set.seed(9L)
-X <- data.frame(matrix(rnorm(20000L), ncol = 100L))
-y <- X[, 1L] * X[, 2L] * X[, 3L]
-fit <- lm(y ~ X1:X2:X3 + ., data = cbind(y = y, X))
-s <- kernelshap(fit, X[1L, ], bg_X = X, verbose = FALSE)
-
-test_that("kernelshap works for large p (hybrid case)", {
-  expect_equal(s$baseline, mean(y))
-  expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
-})
-
diff --git a/tests/testthat/test-methods.R b/tests/testthat/test-methods.R
index 0e365b6..51acd71 100644
--- a/tests/testthat/test-methods.R
+++ b/tests/testthat/test-methods.R
@@ -1,32 +1,27 @@
 fit <- lm(Sepal.Length ~ ., data = iris)
+
 set.seed(1)
-s <- kernelshap(
-  fit, iris[1:2, -1L], bg_X = iris[-1L], verbose = FALSE, exact = FALSE, hybrid_degree = 1
+
+shap <- list(
+  kernelshap(
+    fit, iris[1:2, -1L], bg_X = iris, verbose = FALSE, exact = FALSE, hybrid_degree = 1
+  ),
+  permshap(fit, iris[1:2, -1L], bg_X = iris, verbose = FALSE),
+  additive_shap(fit, iris, verbose = FALSE)
 )
 
 test_that("is.kernelshap() works", {
-  expect_true(is.kernelshap(s))
-  expect_false(is.kernelshap(1))
+  for (s in shap) {
+    expect_true(is.kernelshap(s))
+    expect_false(is.kernelshap(1))
+  }
 })
 
 test_that("print() and summary() do not give an error", {
-  capture_output(expect_no_error(print(s)))
-  capture_output(expect_no_error(summary(s)))
-  capture_output(expect_no_error(summary(s, compact = TRUE)))
+  for (s in shap) {
+    capture_output(expect_no_error(print(s)))
+    capture_output(expect_no_error(summary(s)))
+    capture_output(expect_no_error(summary(s, compact = TRUE)))
+  }
 })
 
-test_that("is.kernelshap(), print() and summary() also works for permshap", {
-  s <- permshap(fit, iris[1:2, -1L], bg_X = iris[-1L], verbose = FALSE)
-  
-  expect_true(is.kernelshap(s))
-  capture_output(expect_no_error(print(s)))
-  capture_output(expect_no_error(summary(s)))
-})
-
-test_that("is.kernelshap(), print() and summary() also works for additive_shap", {
-  s <- additive_shap(fit, iris, verbose = FALSE)
-  
-  expect_true(is.kernelshap(s))
-  capture_output(expect_no_error(print(s)))
-  capture_output(expect_no_error(summary(s)))
-})
diff --git a/tests/testthat/test-permshap-utils.R b/tests/testthat/test-permshap-utils.R
index f1ccff3..cebb3cb 100644
--- a/tests/testthat/test-permshap-utils.R
+++ b/tests/testthat/test-permshap-utils.R
@@ -6,3 +6,4 @@ test_that("rowpaste() does what it should", {
 test_that("shapley_weights() does what it should", {
   expect_equal(shapley_weights(5, 2), factorial(2) * factorial(5 - 2 - 1) / factorial(5))
 })
+
diff --git a/tests/testthat/test-permshap.R b/tests/testthat/test-permshap.R
deleted file mode 100644
index cf51c90..0000000
--- a/tests/testthat/test-permshap.R
+++ /dev/null
@@ -1,197 +0,0 @@
-# Model with non-linearities and interactions
-fit <- lm(
-  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species + Petal.Length,
-  data = iris
-)
-x <- c("Petal.Width", "Species", "Petal.Length")
-preds <- unname(predict(fit, iris))
-s <- permshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE)
-
-test_that("Baseline equals average prediction on background data", {
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-})
-
-test_that("SHAP + baseline = prediction", {
-  expect_equal(rowSums(s$S) + s$baseline, preds[c(1L, 51L, 101L)])
-})
-
-test_that("background data is automatically selected", {
-  # Here, the background data equals the full X
-  s2 <- permshap(fit, iris[, x], verbose = FALSE)
-  expect_equal(s$S, s2$S[c(1L, 51L, 101L), ])
-})
-
-test_that("missing bg_X gives error if X is very small", {
-  expect_error(permshap(fit, iris[1:10, x], verbose = FALSE))
-})
-
-test_that("missing bg_X gives error if X is very small", {
-  expect_warning(permshap(fit, iris[1:30, x], verbose = FALSE))
-})
-
-test_that("selection of bg_X can be controlled via bg_n", {
-  s2 <- permshap(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
-  expect_equal(nrow(s2$bg_X), 20L)
-})
-
-test_that("verbose is chatty", {
-  capture_output(
-    expect_message(
-      permshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = TRUE)
-    )
-  )
-})
-
-test_that("large background data cause warning", {
-  large_bg <- iris[rep(1:150, 230), ]
-  expect_warning(
-    permshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
-  )
-})
-
-test_that("using foreach (non-parallel) gives the same as normal mode", {
-  s_foreach <- suppressWarnings(
-    permshap(
-      fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE, parallel = TRUE
-    )
-  )
-  expect_equal(s, s_foreach)
-})
-
-test_that("Decomposing a single row works", {
-  s <- permshap(fit, iris[1L, x], bg_X = iris, verbose = FALSE)
-  
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1])
-})
-
-test_that("Background data can contain additional columns", {
-  ks4 <- permshap(fit, iris[1L, x], bg_X = cbind(d = 1, iris), verbose = FALSE)
-  expect_true(is.kernelshap(ks4))
-})
-
-test_that("Background data can contain only one single row", {
-  expect_true(
-    is.kernelshap(permshap(fit, iris[1L, x], bg_X = iris[150L, ], verbose = FALSE))
-  )
-  expect_true(
-    is.kernelshap(permshap(fit, iris[1:10, x], bg_X = iris[150L, ], verbose = FALSE))
-  )
-})
-
-test_that("feature_names can drop columns from SHAP calculations", {
-  s_f <- permshap(
-    fit, iris[c(1L, 51L, 101L), ], bg_X = iris, feature_names = x, verbose = FALSE
-  )
-  expect_equal(within(unclass(s), rm(X)), within(unclass(s_f), rm(X)))
-})
-
-test_that("feature_names can rearrange column names in result", {
-  s_f2 <- permshap(
-    fit, iris[c(1L, 51L, 101L), ], bg_X = iris, feature_names = rev(x), verbose = FALSE
-  )
-  expect_equal(s$S, s_f2$S[, x])
-})
-
-test_that("feature_names must be in colnames(X) and colnames(bg_X)", {
-  expect_error(permshap(fit, iris, bg_X = cbind(iris, a = 1), feature_names = "a"))
-  expect_error(permshap(fit, cbind(iris, a = 1), bg_X = iris, feature_names = "a"))
-})
-
-fit <- lm(Sepal.Length ~ ., data = iris[1:4])
-X <- data.matrix(iris[2:4])
-pred_fun <- function(m, X) predict(m, as.data.frame(X))
-preds <- unname(pred_fun(fit, X))
-s <- permshap(fit, X[1:3, ], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
-
-test_that("Matrix input is fine", {
-  expect_true(is.kernelshap(s))
-  expect_equal(s$baseline, mean(iris$Sepal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:3])
-})
-
-test_that("Matrix input works if bg data containts extra columns", {
-  ks5 <- permshap(
-    fit, X[1:3, ], pred_fun = pred_fun, bg_X = cbind(d = 1, X), verbose = FALSE
-  )
-  expect_true(is.kernelshap(ks5))
-})
-
-test_that("Matrix input gives error with inconsistent feature_names", {
-  expect_error(
-    permshap(
-      fit,
-      X[1:3, ],
-      pred_fun = pred_fun,
-      bg_X = X,
-      verbose = FALSE,
-      feature_names = "Sepal.Width"
-    )
-  )
-})
-
-## Now with case weights
-fit <- lm(
-  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species, data = iris,
-  weights = Petal.Length
-)
-x <- c("Petal.Width", "Species")
-preds <- unname(predict(fit, iris))
-s <- permshap(
-  fit, iris[1:5, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE
-)
-
-test_that("Baseline equals weighted average prediction on background data", {
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Petal.Length))
-})
-
-test_that("SHAP + baseline = prediction works with case weights", {
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:5])
-})
-
-test_that("selection of bg_X and bg_w can be controlled via bg_n", {
-  s2 <- permshap(
-    fit, iris[1:30, x], verbose = FALSE, bg_w = iris$Petal.Length[1:30], bg_n = 20L
-  )
-  expect_equal(nrow(s2$bg_X), 20L)
-  expect_equal(length(s2$bg_w), 20L)
-})
-
-test_that("Decomposing a single row works with case weights", {
-  s <- permshap(
-    fit, iris[1L, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE
-  )
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Petal.Length))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1L])
-})
-
-fit <- lm(
-  Sepal.Length ~ . , data = iris[c(1L, 3L, 4L)], weights = iris$Sepal.Width
-)
-X <- data.matrix(iris[3:4])
-preds <- unname(pred_fun(fit, X))
-
-test_that("Matrix input is fine with case weights", {
-  s <- permshap(
-    fit,
-    X[1:3, ],
-    pred_fun = pred_fun,
-    bg_X = X,
-    bg_w = iris$Sepal.Width,
-    verbose = FALSE
-  )
-  
-  expect_true(is.kernelshap(s))
-  expect_equal(s$baseline, weighted.mean(iris$Sepal.Length, iris$Sepal.Width))
-  expect_equal(rowSums(s$S) + s$baseline, preds[1:3])
-})
-
-test_that("On additive model, permshap and kernelshap give identical SHAP values", {
-  fit <- lm(
-    Sepal.Length ~ poly(Petal.Width, degree = 2L) + Species + Petal.Length, data = iris
-  )
-  x <- c("Petal.Width", "Species", "Petal.Length")
-  s1 <- permshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE)
-  s2 <- kernelshap(fit, iris[c(1L, 51L, 101L), x], bg_X = iris, verbose = FALSE)
-  expect_equal(s1$S, s2$S)
-})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 1134de7..218cf9a 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -87,3 +87,4 @@ test_that("align_pred() works", {
   expect_error(align_pred(factor(c("A", "B"))))
   expect_equal(align_pred(1:4), as.matrix(1:4))
 })
+

From b98b287a067beedab23096cf2b7a918e852f36ca Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Wed, 7 Aug 2024 19:51:26 +0200
Subject: [PATCH 5/6] compress tests for multivariate responses

---
 tests/testthat/test-kernelshap-multioutput.R | 88 --------------------
 tests/testthat/test-multioutput.R            | 45 ++++++++++
 tests/testthat/test-permshap-multioutput.R   | 77 -----------------
 3 files changed, 45 insertions(+), 165 deletions(-)
 delete mode 100644 tests/testthat/test-kernelshap-multioutput.R
 create mode 100644 tests/testthat/test-multioutput.R
 delete mode 100644 tests/testthat/test-permshap-multioutput.R

diff --git a/tests/testthat/test-kernelshap-multioutput.R b/tests/testthat/test-kernelshap-multioutput.R
deleted file mode 100644
index e316aa3..0000000
--- a/tests/testthat/test-kernelshap-multioutput.R
+++ /dev/null
@@ -1,88 +0,0 @@
-#===========================================================
-# Tests for multi-output model
-#===========================================================
-
-# Model with non-linearities and interactions
-y <- iris$Sepal.Length
-Y <- as.matrix(iris[, c("Sepal.Length", "Sepal.Width")])
-
-fity <- lm(y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
-fitY <- lm(Y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
-
-x <- c("Petal.Width", "Species")
-
-predsy <- unname(predict(fity, iris))
-predsY <- unname(predict(fitY, iris))
-
-sy <- kernelshap(fity, iris[1:5, x], bg_X = iris, verbose = FALSE)
-sY <- kernelshap(fitY, iris[1:5, x], bg_X = iris, verbose = FALSE)
-
-test_that("Baseline equals average prediction on background data", {
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-})
-
-test_that("SHAP + baseline = prediction", {
-  expect_equal(rowSums(sY$S[[1L]]) + sY$baseline[1L], predsY[1:5, 1L])
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1:5, 2L])
-})
-
-test_that("First dimension of multioutput model equals single output (approx)", {
-  expect_equal(sY$baseline[1L], sy$baseline)
-  expect_equal(sY$S[[1L]], sy$S)
-})
-
-test_that("Decomposing a single row works", {
-  sY <- kernelshap(fitY, iris[1L, x], bg_X = iris, verbose = FALSE)
-  
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-  expect_equal(rowSums(sY$S[[1L]]) + sY$baseline[1L], predsY[1L, 1L])
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1L, 2L])
-})
-
-fitY <- lm(Y ~ poly(Petal.Width, degree = 2L), data = iris)
-x <- "Petal.Width"
-predsY <- unname(predict(fitY, iris))
-
-test_that("Special case p = 1 works", {
-  sY <- kernelshap(fitY, iris[1:5, x, drop = FALSE], bg_X = iris, verbose = FALSE)
-  
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-  expect_equal(unname(rowSums(sY$S[[2L]]) + sY$baseline[2L]), predsY[1:5, 2L])
-  expect_equal(sY$SE[[1L]][1L], 0)
-})
-
-fitY <- lm(Y ~ Petal.Length + Petal.Width, data = iris[1:4])
-X <- data.matrix(iris[2:4])
-pred_fun <- function(fit, X) predict(fit, as.data.frame(X))
-predsY <- unname(pred_fun(fitY, X))
-sY <- kernelshap(fitY, X[1:3, ], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
-
-test_that("Matrix input is fine", {
-  expect_true(is.kernelshap(sY))
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1:3, 2L])
-})
-
-## Now with case weights
-fitY <- lm(
-  Y ~ poly(Petal.Width, degree = 2L) * Species, data = iris, weights = Petal.Length
-)
-x <- c("Petal.Width", "Species")
-predsY <- unname(predict(fitY, iris))
-sY <- kernelshap(
-  fitY, 
-  iris[5:10, x], 
-  pred_fun = predict, 
-  bg_X = iris, 
-  bg_w = iris$Petal.Length, 
-  verbose = FALSE
-)
-
-test_that("Baseline equals weighted average prediction on background data", {
-  expect_equal(sY$baseline[1L], weighted.mean(Y[, 1L], iris$Petal.Length))
-  expect_equal(sY$baseline[2L], weighted.mean(Y[, 2L], iris$Petal.Length))
-})
-
-test_that("SHAP + baseline = prediction works with case weights", {
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
-})
diff --git a/tests/testthat/test-multioutput.R b/tests/testthat/test-multioutput.R
new file mode 100644
index 0000000..b969a4b
--- /dev/null
+++ b/tests/testthat/test-multioutput.R
@@ -0,0 +1,45 @@
+# Model with non-linearities and interactions
+y <- iris$Sepal.Length
+Y <- as.matrix(iris[, c("Sepal.Length", "Sepal.Width")])
+
+fit_y <- lm(y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
+fit_Y <- lm(Y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
+
+x <- c("Petal.Width", "Species")
+J <- c(1L, 51L, 101L)
+
+preds_y <- unname(predict(fit_y, iris))
+preds_Y <- unname(predict(fit_Y, iris))
+
+shap_y <- list(
+  kernelshap(fit_y, iris[J, x], bg_X = iris, verbose = FALSE),
+  permshap(fit_y, iris[J, x], bg_X = iris, verbose = FALSE)
+)
+
+shap_Y <- list(
+  kernelshap(fit_Y, iris[J, x], bg_X = iris, verbose = FALSE),
+  permshap(fit_Y, iris[J, x], bg_X = iris, verbose = FALSE)
+)
+
+test_that("Baseline equals average prediction on background data", {
+  for (i in 1:2) {
+    expect_equal(shap_Y[[i]]$baseline, unname(colMeans(Y)))  
+  }
+})
+
+test_that("SHAP + baseline = prediction", {
+  for (i in 1:2) {
+    s <- shap_Y[[i]]
+    expect_equal(rowSums(s$S[[1L]]) + s$baseline[1L], preds_Y[J, 1L])
+    expect_equal(rowSums(s$S[[2L]]) + s$baseline[2L], preds_Y[J, 2L])
+  }
+})
+
+test_that("First dimension of multioutput model equals single output", {
+  for (i in 1:2) {
+    expect_equal(shap_Y[[i]]$baseline[1L], shap_y[[i]]$baseline)
+    expect_equal(shap_Y[[i]]$S[[1L]], shap_y[[i]]$S)
+  }
+})
+
+
diff --git a/tests/testthat/test-permshap-multioutput.R b/tests/testthat/test-permshap-multioutput.R
deleted file mode 100644
index 7dd7561..0000000
--- a/tests/testthat/test-permshap-multioutput.R
+++ /dev/null
@@ -1,77 +0,0 @@
-#===========================================================
-# Tests for multi-output model
-#===========================================================
-
-# Model with non-linearities and interactions
-y <- iris$Sepal.Length
-Y <- as.matrix(iris[, c("Sepal.Length", "Sepal.Width")])
-
-fity <- lm(y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
-fitY <- lm(Y ~ poly(Petal.Width, degree = 2L) * Species, data = iris)
-
-x <- c("Petal.Width", "Species")
-
-predsy <- unname(predict(fity, iris))
-predsY <- unname(predict(fitY, iris))
-
-sy <- permshap(fity, iris[1:5, x], bg_X = iris, verbose = FALSE)
-sY <- permshap(fitY, iris[1:5, x], bg_X = iris, verbose = FALSE)
-
-test_that("Baseline equals average prediction on background data", {
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-})
-
-test_that("SHAP + baseline = prediction", {
-  expect_equal(rowSums(sY$S[[1L]]) + sY$baseline[1L], predsY[1:5, 1L])
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1:5, 2L])
-})
-
-test_that("First dimension of multioutput model equals single output (approx)", {
-  expect_equal(sY$baseline[1L], sy$baseline)
-  expect_equal(sY$S[[1L]], sy$S)
-})
-
-test_that("Decomposing a single row works", {
-  sY <- permshap(fitY, iris[1L, x], bg_X = iris, verbose = FALSE)
-  
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-  expect_equal(rowSums(sY$S[[1L]]) + sY$baseline[1L], predsY[1L, 1L])
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1L, 2L])
-})
-
-fitY <- lm(Y ~ Petal.Length + Petal.Width, data = iris[1:4])
-X <- data.matrix(iris[2:4])
-pred_fun <- function(fit, X) predict(fit, as.data.frame(X))
-predsY <- unname(pred_fun(fitY, X))
-sY <- permshap(fitY, X[1:3, ], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
-
-test_that("Matrix input is fine", {
-  expect_true(is.kernelshap(sY))
-  expect_equal(sY$baseline, unname(colMeans(Y)))
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[1:3, 2L])
-})
-
-## Now with case weights
-fitY <- lm(
-  Y ~ poly(Petal.Width, degree = 2L) * Species, data = iris, weights = Petal.Length
-)
-x <- c("Petal.Width", "Species")
-predsY <- unname(predict(fitY, iris))
-sY <- permshap(
-  fitY,
-  iris[5:10, x],
-  pred_fun = predict,
-  bg_X = iris,
-  bg_w = iris$Petal.Length,
-  verbose = FALSE
-)
-
-test_that("Baseline equals weighted average prediction on background data", {
-  expect_equal(sY$baseline[1L], weighted.mean(Y[, 1L], iris$Petal.Length))
-  expect_equal(sY$baseline[2L], weighted.mean(Y[, 2L], iris$Petal.Length))
-})
-
-test_that("SHAP + baseline = prediction works with case weights", {
-  expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
-})
-

From 34942134d48a4d054a6628c6c554cb3863b2a7bb Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Wed, 7 Aug 2024 19:54:25 +0200
Subject: [PATCH 6/6] move test-weights to right place

---
 tests/{ => testthat}/test-weights.R | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{ => testthat}/test-weights.R (100%)

diff --git a/tests/test-weights.R b/tests/testthat/test-weights.R
similarity index 100%
rename from tests/test-weights.R
rename to tests/testthat/test-weights.R