Merge pull request #143 from ModelOriented/improve-tests

More compact tests
ModelOriented · Aug 7, 2024 · 55c8ea7 · 55c8ea7
2 parents 16f9c3e + 3494213
commit 55c8ea7
Show file tree

Hide file tree

Showing 12 changed files with 383 additions and 711 deletions.
diff --git a/tests/testthat/test-additive_shap.R b/tests/testthat/test-additive_shap.R
@@ -1,60 +1,34 @@
-test_that("simple additive formula gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ .
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
-
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-
-  X <- head(iris[-1L])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
+test_that("Additive formulas give same as agnostic SHAP with full training data as bg data", {
+  formulas <- list(
+    Sepal.Length ~ .,
+    Sepal.Length ~ log(Sepal.Width) + poly(Sepal.Width, 2) + Petal.Length,
+    form <- Sepal.Length ~ log(Sepal.Width) + Species + poly(Petal.Length, 2)
   )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
-})
-
-test_that("formula where feature appears in two terms gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ log(Sepal.Width) + poly(Sepal.Width, 2) + Petal.Length
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
-
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-
-  X <- head(iris[2:3])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
+  xvars <- list(
+    setdiff(colnames(iris), "Sepal.Length"),
+    c("Sepal.Width", "Petal.Length"),
+    xvars <- c("Sepal.Width", "Petal.Length", "Species")
   )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
-})
-
-test_that("formula with complicated terms gives same as permshap() if full training data is used as bg data", {
-  form <- Sepal.Length ~ 
-    log(Sepal.Width) + Species + poly(Petal.Length, 2)
 
-  fit_lm <- lm(form, data = iris)
-  fit_glm <- glm(form, data = iris, family = quasipoisson)
-
-  s_add_lm <- additive_shap(fit_lm, head(iris), verbose = FALSE)
-  s_add_glm <- additive_shap(fit_glm, head(iris), verbose = FALSE)
-
-  X <- head(iris[c(2, 3, 5)])
-  s_perm_lm <- permshap(fit_lm, X = X, bg_X = iris, verbose = FALSE)
-  s_perm_glm <- permshap(
-    fit_glm, X = X, bg_X = iris, verbose = FALSE
-  )
-  expect_equal(s_add_lm$S, s_perm_lm$S)
-  expect_equal(s_add_glm$S, s_perm_glm$S)
-  expect_equal(s_add_lm$predictions, unname(predict(fit_lm, newdata = X)))
-  expect_equal(s_add_glm$predictions, unname(predict(fit_glm, newdata = X)))
+  for (j in seq_along(formulas)) {
+    fit <- list(
+      lm = lm(formulas[[j]], data = iris),
+      glm = glm(formulas[[j]], data = iris, family = quasipoisson)
+    )
+
+    shap1 <- lapply(fit, additive_shap, head(iris), verbose = FALSE)
+    shap2 <- lapply(
+      fit, permshap, head(iris), bg_X = iris, verbose = FALSE, feature_names = xvars[[j]]
+    )
+    shap3 <- lapply(
+      fit, kernelshap, head(iris), bg_X = iris, verbose = FALSE, feature_names = xvars[[j]]
+    )
+
+    for (i in seq_along(fit)) {
+      expect_equal(shap1[[i]]$S, shap2[[i]]$S)
+      expect_equal(shap1[[i]]$S, shap3[[i]]$S)
+    }
+  }
 })
 
 test_that("formulas with more than one covariate per term fail", {
@@ -65,10 +39,12 @@ test_that("formulas with more than one covariate per term fail", {
   )
 
   for (formula in formulas_bad) {
-    fit <- lm(formula, data = iris)
-    expect_error(s <- additive_shap(fit, head(iris), verbose = FALSE))
-
-    fit <- glm(formula, data = iris, family = quasipoisson)
-    expect_error(s <- additive_shap(fit, head(iris), verbose = FALSE))
+    fit <- list(
+      lm = lm(formula, data = iris),
+      glm = glm(formula, data = iris, family = quasipoisson)
+    )
+    for (f in fit)
+      expect_error(additive_shap(f, head(iris), verbose = FALSE))
   }  
 })
+
diff --git a/tests/testthat/test-basic.R b/tests/testthat/test-basic.R
@@ -0,0 +1,190 @@
+# Model with non-linearities and interactions
+fit <- lm(
+  Sepal.Length ~ poly(Petal.Width, degree = 2L) * Species + Petal.Length, data = iris
+)
+x <- c("Petal.Width", "Species", "Petal.Length")
+preds <- unname(predict(fit, iris))
+J <- c(1L, 51L, 101L)
+
+shap <- list(
+  kernelshap(fit, iris[x], bg_X = iris, verbose = FALSE),
+  permshap(fit, iris[x], bg_X = iris, verbose = FALSE)
+)
+
+test_that("baseline equals average prediction on background data", {
+  for (s in shap)
+    expect_equal(s$baseline, mean(iris$Sepal.Length))
+})
+
+test_that("SHAP + baseline = prediction for exact mode", {
+  for (s in shap)
+    expect_equal(rowSums(s$S) + s$baseline, preds)
+})
+
+test_that("auto-selection of background data works", {
+  # Here, the background data equals the full X
+  shap2 <- list(
+    kernelshap(fit, iris[x], verbose = FALSE),
+    permshap(fit, iris[x], verbose = FALSE)
+  )
+
+  for (i in 1:2) {
+    expect_equal(shap$S, shap2$S)
+  }
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  for (algo in c(kernelshap, permshap))
+    expect_error(algo(fit, iris[1:10, x], verbose = FALSE))  
+
+})
+
+test_that("missing bg_X gives warning if X is quite small", {
+  for (algo in c(kernelshap, permshap))
+    expect_warning(algo(fit, iris[1:30, x], verbose = FALSE))
+})
+
+test_that("selection of bg_X can be controlled via bg_n", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[x], verbose = FALSE, bg_n = 20L)
+    expect_equal(nrow(s$bg_X), 20L)
+  }
+})
+
+test_that("using foreach (non-parallel) gives the same as normal mode", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, x], bg_X = iris, verbose = FALSE)
+    s2 <- suppressWarnings(
+      algo(fit, iris[J, x], bg_X = iris, verbose = FALSE, parallel = TRUE)
+    )
+    expect_equal(s, s2)
+  }
+})
+
+test_that("verbose is chatty", {
+  for (algo in c(kernelshap, permshap)) {
+    capture_output(expect_message(algo(fit, iris[J, x], bg_X = iris, verbose = TRUE)))
+  }
+})
+
+test_that("large background data cause warning", {
+  # Takes a bit of time, thus only for one algo
+  large_bg <- iris[rep(1:150, 230), ]
+  expect_warning(
+    kernelshap(fit, iris[1L, x], bg_X = large_bg, verbose = FALSE)
+  )
+})
+
+test_that("Decomposing a single row works", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1L, x], bg_X = iris, verbose = FALSE)
+    expect_equal(s$baseline, mean(iris$Sepal.Length))
+    expect_equal(rowSums(s$S) + s$baseline, preds[1])
+  }
+})
+
+test_that("Background data can contain additional columns", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[1L, x], bg_X = cbind(d = 1, iris), verbose = FALSE)
+    expect_true(is.kernelshap(s))
+  }
+})
+
+test_that("Background data can contain only one single row", {
+  for (algo in c(kernelshap, permshap))
+    expect_no_error(algo(fit, iris[1L, x], bg_X = iris[150L, ], verbose = FALSE))
+})
+
+test_that("feature_names can drop columns from SHAP calculations", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, ], bg_X = iris, feature_names = x, verbose = FALSE)
+    expect_equal(colnames(s$S), x)
+  }
+})
+
+test_that("feature_names can rearrange column names in result", {
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, iris[J, ], bg_X = iris, feature_names = rev(x), verbose = FALSE)
+    expect_equal(colnames(s$S), rev(x))
+  }
+})
+
+test_that("feature_names must be in colnames(X) and colnames(bg_X)", {
+  for (algo in c(kernelshap, permshap)) {
+    expect_error(algo(fit, iris, bg_X = cbind(iris, a = 1), feature_names = "a"))
+    expect_error(algo(fit, cbind(iris, a = 1), bg_X = iris, feature_names = "a"))
+  }
+})
+
+test_that("Matrix input is fine", {
+  X <- data.matrix(iris)
+  pred_fun <- function(m, X) {
+    data <- as.data.frame(X) |> 
+      transform(Species = factor(Species, labels = levels(iris$Species)))
+    predict(m, data)
+  }
+
+  for (algo in c(kernelshap, permshap)) {
+    s <- algo(fit, X[J, x], pred_fun = pred_fun, bg_X = X, verbose = FALSE)
+
+    expect_equal(s$baseline, mean(iris$Sepal.Length))  # baseline is mean of bg
+    expect_equal(rowSums(s$S) + s$baseline, preds[J])  # sum shap = centered preds
+    expect_no_error(                                   # additional cols in bg are ok
+      algo(fit, X[J, x], pred_fun = pred_fun, bg_X = cbind(d = 1, X), verbose = FALSE)
+    )
+    expect_error(                                      # feature_names are less flexible
+      algo(fit, X[J, ], pred_fun = pred_fun, bg_X = X, 
+           verbose = FALSE, feature_names = "Sepal.Width")
+    )
+  }
+})
+
+test_that("Special case p = 1 works only for kernelshap()", {
+  capture_output(
+    expect_message(
+      s <- kernelshap(fit, X = iris[J, ], bg_X = iris, feature_names = "Petal.Width")
+    )
+  )
+  expect_equal(s$baseline, mean(iris$Sepal.Length))
+  expect_equal(unname(rowSums(s$S)) + s$baseline, preds[J])
+  expect_equal(s$SE[1L], 0)
+
+  expect_error(  # Not implemented
+    permshap(
+      fit, iris[J, ], bg_X = iris, verbose = FALSE, feature_names = "Petal.Width"
+    )
+  )
+})
+
+test_that("exact hybrid kernelshap() is similar to exact (non-hybrid)", {
+  s1 <- kernelshap(
+    fit, iris[J, x], bg_X = iris, exact = FALSE, hybrid_degree = 1L, verbose = FALSE
+  )
+  expect_equal(s1$S, shap[[1L]]$S[J, ])
+})
+
+test_that("baseline equals average prediction on background data in sampling mode", {
+  s2 <- s_sampling <- kernelshap(
+    fit, iris[J, x], bg_X = iris, hybrid_degree = 0L, verbose = FALSE, exact = FALSE
+  )
+  expect_equal(s2$baseline, mean(iris$Sepal.Length))
+})
+
+test_that("SHAP + baseline = prediction for sampling mode", {
+  s2 <- s_sampling <- kernelshap(
+    fit, iris[J, x], bg_X = iris, hybrid_degree = 0L, verbose = FALSE, exact = FALSE
+  )
+  expect_equal(rowSums(s2$S) + s2$baseline, preds[J])
+})
+
+test_that("kernelshap works for large p (hybrid case)", {
+  set.seed(9L)
+  X <- data.frame(matrix(rnorm(20000L), ncol = 100L))
+  y <- X[, 1L] * X[, 2L] * X[, 3L]
+  fit <- lm(y ~ X1:X2:X3 + ., data = cbind(y = y, X))
+  s <- kernelshap(fit, X[1L, ], bg_X = X, verbose = FALSE)
+
+  expect_equal(s$baseline, mean(y))
+  expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
+})
+
diff --git a/tests/testthat/test-kernelshap-multioutput.R b/tests/testthat/test-kernelshap-multioutput.R
diff --git a/tests/testthat/test-kernelshap-utils.R b/tests/testthat/test-kernelshap-utils.R
@@ -1,4 +1,4 @@
-test_that("Sum of kernel weights is 1", {
+test_that("sum of kernel weights is 1", {
   for (p in 2:10) {
     expect_equal(sum(kernel_weights(p)), 1.0)
   }
@@ -121,3 +121,4 @@ test_that("input_partly_exact(p, deg) fails for bad p or deg", {
   expect_error(input_partly_exact(2L, deg = 0L, feature_names = LETTERS[1:p]))
   expect_error(input_partly_exact(5L, deg = 3L, feature_names = LETTERS[1:p]))
 })
+