Merge pull request #135 from ModelOriented/remove-factor-predictions

Remove factor predictions
ModelOriented · Jul 4, 2024 · 5c3914f · 5c3914f
2 parents 1b7072e + 3260e31
commit 5c3914f
Show file tree

Hide file tree

Showing 11 changed files with 33 additions and 170 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -14,3 +14,4 @@
 ^pkgdown/_pkgdown\.yml$
 ^\.github$
 ^revdep$
+^compare_with_python.R$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,9 +1,11 @@
 Package: kernelshap
 Title: Kernel SHAP
-Version: 0.5.1
+Version: 0.6.0
 Authors@R: c(
-    person("Michael", "Mayer", , "[email protected]", role = c("aut", "cre")),
-    person("David", "Watson", , "[email protected]", role = "aut"),
+    person("Michael", "Mayer", , "[email protected]", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0002-6148-5756")),
+    person("David", "Watson", , "[email protected]", role = "aut",
+           comment = c(ORCID = "0000-0001-9632-2159")),
     person("Przemyslaw", "Biecek", , "[email protected]", role = "ctb",
            comment = c(ORCID = "0000-0001-8423-1823"))
   )
@@ -22,6 +24,7 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
 Imports: 
     foreach,
+    MASS,
     stats,
     utils
 Suggests: 

diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,15 @@
-# kernelshap 0.5.1
+# kernelshap 0.6.0
+
+This release is intended to be the last before stable version 1.0.0.
+
+## Major changes
+
+- Factor-valued predictions are not anymore supported.
 
 ## Maintenance
 
-- Fix CRAN note regarding unavailable link to `gam::gam()`.
+- Fix CRAN note about unavailable link to `gam::gam()`.
+- Added dependency to {MASS} for calculating Moore-Penrose generalized matrix inverse.
 
 # kernelshap 0.5.0
 

diff --git a/R/utils.R b/R/utils.R
@@ -32,9 +32,7 @@ rep_rows <- function(x, i) {
 #' @param w Optional case weights.
 #' @returns A (1 x ncol(x)) matrix of column means.
 wcolMeans <- function(x, w = NULL, ...) {
-  if (!is.matrix(x)) {
-    x <- as.matrix(x)
-  }
+  x <- as.matrix(x)
   out <- if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w)
   t.default(out)
 }
@@ -89,15 +87,9 @@ get_vz <- function(X, bg, Z, object, pred_fun, w, ...) {
       X[[v]][s] <- bg[[v]][s]
     }
   }
-  preds <- align_pred(pred_fun(object, X, ...), ohe = FALSE)
+  preds <- align_pred(pred_fun(object, X, ...))
 
-  # Aggregate (distinguishing some faster cases)
-  if (is.factor(preds)) {
-    if (is.null(w)) {
-      return(rowmean_factor(preds, ngroups = m))
-    }
-    preds <- fdummy(preds)
-  }
+  # Aggregate (distinguishing fast 1-dim case)
   if (ncol(preds) == 1L) {
     return(wrowmean_vector(preds, ngroups = m, w = w))
   }
@@ -148,24 +140,20 @@ reorganize_list <- function(alist) {
   lapply(out, as.matrix)
 }
 
-#' Aligns Predictions (adapted from {hstats})
+#' Aligns Predictions
 #'
 #' Turns predictions into matrix.
 #'
 #' @noRd
 #' @keywords internal
 #'
 #' @param x Object representing model predictions.
-#' @param ohe If `x` is a factor: should it be one-hot encoded? Default is `TRUE`.
-#' @returns Like `x`, but converted to matrix (or a factor).
-align_pred <- function(x, ohe = TRUE) {
+#' @returns Like `x`, but converted to matrix.
+align_pred <- function(x) {
   if (is.data.frame(x) && ncol(x) == 1L) {
     x <- x[[1L]]
   }
-  if (ohe && is.factor(x)) {
-    return(fdummy(x))
-  }
-  if (is.matrix(x) || is.factor(x)) x else as.matrix(x)
+  as.matrix(x)
 }
 
 #' Head of List Elements
@@ -252,28 +240,6 @@ rep_each <- function(m, each) {
   out 
 }
 
-#' Fast OHE (from {hstats})
-#' 
-#' Turns vector/factor into its One-Hot-Encoding.
-#' Ingeniouly written by Mathias Ambuehl.
-#' 
-#' Working with integers instead of doubles would be slightly faster, but at the price
-#' of potential integer overflows in subsequent calculations.
-#' 
-#' @noRd
-#' @keywords internal
-#' 
-#' @param x Object representing model predictions.
-#' @returns Like `x`, but converted to matrix.
-fdummy <- function(x) {
-  x <- as.factor(x)
-  lev <- levels(x)
-  out <- matrix(0, nrow = length(x), ncol = length(lev))
-  out[cbind(seq_along(x), as.integer(x))] <- 1
-  colnames(out) <- lev
-  out 
-}
-
 #' Grouped Means for Single-Column Matrices (adapted from {hstats})
 #'
 #' Grouped means for matrix with single column over fixed-length groups.
@@ -299,26 +265,6 @@ wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {
   out
 }
 
-#' rowmean_vector() for factors (copied from {hstats})
-#'
-#' Grouped `colMeans_factor()` for equal sized groups.
-#'
-#' @noRd
-#' @keywords internal
-#'
-#' @param x Factor-like.
-#' @param ngroups Number of subsequent, equals sized groups.
-#' @returns Matrix with column names.
-rowmean_factor <- function(x, ngroups = 1L) {
-  x <- as.factor(x)
-  lev <- levels(x)
-  n_bg <- length(x) %/% ngroups
-  dim(x) <- c(n_bg, ngroups)
-  out <- t.default(apply(x, 2L, FUN = tabulate, nbins = length(lev)))
-  colnames(out) <- lev
-  out / n_bg
-}
-
 #' Basic Input Checks
 #' 
 #' @noRd

diff --git a/R/utils_kernelshap.R b/R/utils_kernelshap.R
@@ -88,38 +88,12 @@ kernelshap_one <- function(x, v1, object, pred_fun, feature_names, bg_w, exact,
 # A: (p x p), b: (p x k), constraint: (1 x K)
 solver <- function(A, b, constraint) {
   p <- ncol(A)
-  Ainv <- ginv(A)
+  Ainv <- MASS::ginv(A)
   dimnames(Ainv) <- dimnames(A)
   s <- (matrix(colSums(Ainv %*% b), nrow = 1L) - constraint) / sum(Ainv)     #  (1 x K)
   Ainv %*% (b - s[rep.int(1L, p), , drop = FALSE])                           #  (p x K)
 }
 
-ginv <- function (X, tol = sqrt(.Machine$double.eps)) {
-  ##
-  ## Based on an original version in package MASS 7.3.56
-  ## (with permission)
-  ##
-  if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) {
-    stop("'X' must be a numeric or complex matrix")
-  }
-  if (!is.matrix(X)) {
-    X <- as.matrix(X)
-  }
-  Xsvd <- svd(X)
-  if (is.complex(X)) {
-    Xsvd$u <- Conj(Xsvd$u)
-  }
-  Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
-  if (all(Positive)) {
-    Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u))
-  } else if (!any(Positive)) {
-    array(0, dim(X)[2L:1L])
-  } else {
-    Xsvd$v[, Positive, drop = FALSE] %*% 
-      ((1 / Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE]))
-  }
-}
-
 # Draw m binary vectors z of length p with sum(z) distributed according 
 # to Kernel SHAP weights -> (m x p) matrix. 
 # The argument S can be used to restrict the range of sum(z).

diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 
 The package contains two workhorses to calculate SHAP values for *any* model:
 
-- `permshap()`: Exact permutation SHAP algorithm of [1]. Available for up to $p=14$ features.
+- `permshap()`: Exact permutation SHAP algorithm of [1]. Recommended for up to 8-10 features.
 - `kernelshap()`: Kernel SHAP algorithm of [2] and [3]. By default, exact Kernel SHAP is used for up to $p=8$ features, and an almost exact hybrid algorithm otherwise.
 
 Furthermore, the function `additive_shap()` produces SHAP values for additive models fitted via `lm()`, `glm()`, `mgcv::gam()`, `mgcv::bam()`, `gam::gam()`,
@@ -38,7 +38,6 @@ If the training data is small, use the full training data. In cases with a natur
 **Remarks**
 
 - Multivariate predictions are handled at no additional computational cost.
-- Factor-valued predictions are automatically turned into one-hot-encoded columns.
 - Case weights are supported via the argument `bg_w`.
 - By changing the defaults in `kernelshap()`, the iterative pure sampling approach in [3] can be enforced.
 - The `additive_shap()` explainer is easier to use: Only the model and `X` are required.
@@ -55,7 +54,7 @@ devtools::install_github("ModelOriented/kernelshap")
 
 ## Basic Usage
 
-Let's model diamond prices with a (not too complex) random forest. As an alternative, you could use the {treeshap} package in this situation.
+Let's model diamond prices with a random forest. As an alternative, you could use the {treeshap} package in this situation.
 
 ```r
 library(kernelshap)
@@ -236,7 +235,6 @@ sv_dependence(shap_values, v = "carat", color_var = NULL)
 {kernelshap} supports multivariate predictions like:
 
 - probabilistic classification,
-- non-probabilistic classification (factor-valued responses are turned into dummies),
 - regression with multivariate response, and
 - predictions found by applying multiple regression models.
 
@@ -255,10 +253,6 @@ ps_prob <- permshap(fit_prob, X = iris[, -5], bg_X = iris) |>
   shapviz()
 sv_importance(ps_prob)
 sv_dependence(ps_prob, "Petal.Length")
-
-# Non-probabilistic classification (results not shown)
-fit_class <- ranger(Species ~ ., data = iris, num.trees = 20)
-ps_class <- permshap(fit_class, X = iris[, -5], bg_X = iris)
 ```
 
 ![](man/figures/README-prob-imp.svg)
@@ -291,7 +285,7 @@ iris_wf <- workflow() |>
 fit <- iris_wf |>
   fit(iris)
 
-system.time(  # 6s
+system.time(  # 4s
   ps <- permshap(fit, iris[-5], bg_X = iris, type = "prob")
 )
 ps
@@ -333,12 +327,9 @@ task_classif <- TaskClassif$new(id = "1", backend = iris, target = "Species")
 learner_classif <- lrn("classif.rpart", predict_type = "prob")
 learner_classif$train(task_classif)
 
-predict(learner_classif, head(iris))  # setosa setosa        # Classes
-predict(learner_classif, head(iris), predict_type = "prob")  # Probs per class
-
 x <- learner_classif$selected_features()
 
-# For *probabilistic* classification, pass predict_type = "prob" to mlr3's predict()
+# Don't forget to pass predict_type = "prob" to mlr3's predict()
 ps <- permshap(
   learner_classif, X = iris, bg_X = iris, feature_names = x, predict_type = "prob"
 )
@@ -347,10 +338,6 @@ ps
 #      Petal.Length Petal.Width
 # [1,]    0.6666667           0
 # [2,]    0.6666667           0
-
-# Non-probabilistic classification uses auto-OHE internally
-ps <- permshap(learner_classif, X = iris, bg_X = iris, feature_names = x)
-ps
 ```
 
 ## References

diff --git a/packaging.R b/packaging.R
@@ -15,15 +15,15 @@ library(usethis)
 use_description(
   fields = list(
     Title = "Kernel SHAP",
-    Version = "0.5.1",
+    Version = "0.6.0",
     Description = "Efficient implementation of Kernel SHAP, see Lundberg and Lee (2017),
     and Covert and Lee (2021) <http://proceedings.mlr.press/v130/covert21a>.
     Furthermore, for up to 14 features, exact permutation SHAP values can be calculated.
     The package plays well together with meta-learning packages like 'tidymodels', 'caret' or 'mlr3'.
     Visualizations can be done using the R package 'shapviz'.",
     `Authors@R` = 
-    "c(person('Michael', family='Mayer', role=c('aut', 'cre'), email='[email protected]'),
-       person('David', family='Watson', role='aut', email='[email protected]'),
+    "c(person('Michael', family='Mayer', role=c('aut', 'cre'), email='[email protected]', comment=c(ORCID='0000-0002-6148-5756')),
+       person('David', family='Watson', role='aut', email='[email protected]', comment=c(ORCID='0000-0001-9632-2159')),
        person('Przemyslaw', family='Biecek', email='[email protected]', role='ctb', comment=c(ORCID='0000-0001-8423-1823'))
       )",
     Depends = "R (>= 3.2.0)",
@@ -32,9 +32,10 @@ use_description(
   roxygen = TRUE
 )
 
+use_package("foreach", "Imports")
+use_package("MASS", "Imports")
 use_package("stats", "Imports")
 use_package("utils", "Imports")
-use_package("foreach", "Imports")
 
 use_package("doFuture", "Suggests")
 

diff --git a/tests/testthat/test-kernelshap-multioutput.R b/tests/testthat/test-kernelshap-multioutput.R
@@ -86,17 +86,3 @@ test_that("Baseline equals weighted average prediction on background data", {
 test_that("SHAP + baseline = prediction works with case weights", {
   expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
 })
-
-test_that("factor predictions work", {
-  pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
-  X <- cbind(v1 = 0:1, v2 = 0)
-  out <- kernelshap(1, X = X, bg_X = X, pred_fun = pf, verbose = FALSE)
-  expect_equal(colnames(out$S$zero), c("v1", "v2"))
-  expect_equal(names(out$S), c("zero", "one"))
-  expect_equal(out$predictions, cbind(zero = 1:0, one = 0:1))
-
-  # with weights
-  w <- rep(2, nrow(X))
-  out2 <- kernelshap(1, X = X, bg_X = X, bg_w = w, pred_fun = pf, verbose = FALSE)
-  expect_equal(out, out2)
-})
diff --git a/tests/testthat/test-kernelshap.R b/tests/testthat/test-kernelshap.R
@@ -238,4 +238,3 @@ test_that("kernelshap works for large p (hybrid case)", {
   expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
 })
 
-
diff --git a/tests/testthat/test-permshap-multioutput.R b/tests/testthat/test-permshap-multioutput.R
@@ -75,16 +75,3 @@ test_that("SHAP + baseline = prediction works with case weights", {
   expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
 })
 
-test_that("factor predictions work", {
-  pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
-  X <- cbind(v1 = 0:1, v2 = 0)
-  out <- permshap(1, X = X, bg_X = X, pred_fun = pf, verbose = FALSE)
-  expect_equal(colnames(out$S$zero), c("v1", "v2"))
-  expect_equal(names(out$S), c("zero", "one"))
-  expect_equal(out$predictions, cbind(zero = 1:0, one = 0:1))
-
-  # with weights
-  w <- rep(2, nrow(X))
-  out2 <- permshap(1, X = X, bg_X = X, bg_w = w, pred_fun = pf, verbose = FALSE)
-  expect_equal(out, out2)
-})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
@@ -60,27 +60,6 @@ test_that("rep_each() works", {
   expect_true(is.integer(rep_each(100, 100)))
 })
 
-test_that("fdummy() works", {
-  x <- c("A", "A", "C", "D")
-  mm <- matrix(model.matrix(~ x + 0), ncol = 3, dimnames = list(NULL, c("A", "C", "D")))
-  expect_equal(fdummy(x), mm)
-})
-
-test_that("fdummy() works for singletons", {
-  x <- c("A")
-  expect_equal(fdummy(x), cbind(A = 1))
-  expect_true(is.matrix(fdummy(x)))
-})
-
-test_that("fdummy() respects factor level order", {
-  x1 <- factor(c("A", "A", "C", "D"))
-  x2 <- factor(x1, levels = rev(levels(x1)))
-  d1 <- fdummy(x1)
-  d2 <- fdummy(x2)
-  expect_equal(d1, d2[, colnames(d1)])
-  expect_equal(colnames(d1), rev(colnames(d2)))
-})
-
 test_that("wrowmean_vector() works for 1D matrices", {
   x2 <- cbind(a = 6:1)
   out2 <- wrowmean_vector(x2, ngroups = 2L)
@@ -103,10 +82,3 @@ test_that("wrowmean_vector() works for 1D matrices", {
   expect_equal(out2, cbind(a = c(a, b)))
 })
 
-test_that("rowmean_factor() works for factor input", {
-  x <- factor(c("C", "A", "C", "C", "A", "A"))
-  out <- rowmean_factor(x, ngroups = 2L)
-
-  expect_true(is.matrix(out))
-  expect_equal(out, cbind(A = c(1/3, 2/3), C = c(2/3, 1/3)))
-})
Original file line number	Diff line number	Diff line change
Expand Up		@@ -238,4 +238,3 @@ test_that("kernelshap works for large p (hybrid case)", {
		expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
		})