Skip to content

Commit

Permalink
Merge pull request #135 from ModelOriented/remove-factor-predictions
Browse files Browse the repository at this point in the history
Remove factor predictions
  • Loading branch information
mayer79 authored Jul 4, 2024
2 parents 1b7072e + 3260e31 commit 5c3914f
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 170 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
^pkgdown/_pkgdown\.yml$
^\.github$
^revdep$
^compare_with_python.R$
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
Package: kernelshap
Title: Kernel SHAP
Version: 0.5.1
Version: 0.6.0
Authors@R: c(
person("Michael", "Mayer", , "[email protected]", role = c("aut", "cre")),
person("David", "Watson", , "[email protected]", role = "aut"),
person("Michael", "Mayer", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-6148-5756")),
person("David", "Watson", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0001-9632-2159")),
person("Przemyslaw", "Biecek", , "[email protected]", role = "ctb",
comment = c(ORCID = "0000-0001-8423-1823"))
)
Expand All @@ -22,6 +24,7 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
Imports:
foreach,
MASS,
stats,
utils
Suggests:
Expand Down
11 changes: 9 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
# kernelshap 0.5.1
# kernelshap 0.6.0

This release is intended to be the last before stable version 1.0.0.

## Major changes

- Factor-valued predictions are not anymore supported.

## Maintenance

- Fix CRAN note regarding unavailable link to `gam::gam()`.
- Fix CRAN note about unavailable link to `gam::gam()`.
- Added dependency to {MASS} for calculating Moore-Penrose generalized matrix inverse.

# kernelshap 0.5.0

Expand Down
68 changes: 7 additions & 61 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ rep_rows <- function(x, i) {
#' @param w Optional case weights.
#' @returns A (1 x ncol(x)) matrix of column means.
wcolMeans <- function(x, w = NULL, ...) {
if (!is.matrix(x)) {
x <- as.matrix(x)
}
x <- as.matrix(x)
out <- if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w)
t.default(out)
}
Expand Down Expand Up @@ -89,15 +87,9 @@ get_vz <- function(X, bg, Z, object, pred_fun, w, ...) {
X[[v]][s] <- bg[[v]][s]
}
}
preds <- align_pred(pred_fun(object, X, ...), ohe = FALSE)
preds <- align_pred(pred_fun(object, X, ...))

# Aggregate (distinguishing some faster cases)
if (is.factor(preds)) {
if (is.null(w)) {
return(rowmean_factor(preds, ngroups = m))
}
preds <- fdummy(preds)
}
# Aggregate (distinguishing fast 1-dim case)
if (ncol(preds) == 1L) {
return(wrowmean_vector(preds, ngroups = m, w = w))
}
Expand Down Expand Up @@ -148,24 +140,20 @@ reorganize_list <- function(alist) {
lapply(out, as.matrix)
}

#' Aligns Predictions (adapted from {hstats})
#' Aligns Predictions
#'
#' Turns predictions into matrix.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Object representing model predictions.
#' @param ohe If `x` is a factor: should it be one-hot encoded? Default is `TRUE`.
#' @returns Like `x`, but converted to matrix (or a factor).
align_pred <- function(x, ohe = TRUE) {
#' @returns Like `x`, but converted to matrix.
align_pred <- function(x) {
if (is.data.frame(x) && ncol(x) == 1L) {
x <- x[[1L]]
}
if (ohe && is.factor(x)) {
return(fdummy(x))
}
if (is.matrix(x) || is.factor(x)) x else as.matrix(x)
as.matrix(x)
}

#' Head of List Elements
Expand Down Expand Up @@ -252,28 +240,6 @@ rep_each <- function(m, each) {
out
}

#' Fast OHE (from {hstats})
#'
#' Turns vector/factor into its One-Hot-Encoding.
#' Ingeniouly written by Mathias Ambuehl.
#'
#' Working with integers instead of doubles would be slightly faster, but at the price
#' of potential integer overflows in subsequent calculations.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Object representing model predictions.
#' @returns Like `x`, but converted to matrix.
fdummy <- function(x) {
x <- as.factor(x)
lev <- levels(x)
out <- matrix(0, nrow = length(x), ncol = length(lev))
out[cbind(seq_along(x), as.integer(x))] <- 1
colnames(out) <- lev
out
}

#' Grouped Means for Single-Column Matrices (adapted from {hstats})
#'
#' Grouped means for matrix with single column over fixed-length groups.
Expand All @@ -299,26 +265,6 @@ wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {
out
}

#' rowmean_vector() for factors (copied from {hstats})
#'
#' Grouped `colMeans_factor()` for equal sized groups.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Factor-like.
#' @param ngroups Number of subsequent, equals sized groups.
#' @returns Matrix with column names.
rowmean_factor <- function(x, ngroups = 1L) {
x <- as.factor(x)
lev <- levels(x)
n_bg <- length(x) %/% ngroups
dim(x) <- c(n_bg, ngroups)
out <- t.default(apply(x, 2L, FUN = tabulate, nbins = length(lev)))
colnames(out) <- lev
out / n_bg
}

#' Basic Input Checks
#'
#' @noRd
Expand Down
28 changes: 1 addition & 27 deletions R/utils_kernelshap.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,38 +88,12 @@ kernelshap_one <- function(x, v1, object, pred_fun, feature_names, bg_w, exact,
# A: (p x p), b: (p x k), constraint: (1 x K)
solver <- function(A, b, constraint) {
p <- ncol(A)
Ainv <- ginv(A)
Ainv <- MASS::ginv(A)
dimnames(Ainv) <- dimnames(A)
s <- (matrix(colSums(Ainv %*% b), nrow = 1L) - constraint) / sum(Ainv) # (1 x K)
Ainv %*% (b - s[rep.int(1L, p), , drop = FALSE]) # (p x K)
}

ginv <- function (X, tol = sqrt(.Machine$double.eps)) {
##
## Based on an original version in package MASS 7.3.56
## (with permission)
##
if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) {
stop("'X' must be a numeric or complex matrix")
}
if (!is.matrix(X)) {
X <- as.matrix(X)
}
Xsvd <- svd(X)
if (is.complex(X)) {
Xsvd$u <- Conj(Xsvd$u)
}
Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
if (all(Positive)) {
Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u))
} else if (!any(Positive)) {
array(0, dim(X)[2L:1L])
} else {
Xsvd$v[, Positive, drop = FALSE] %*%
((1 / Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE]))
}
}

# Draw m binary vectors z of length p with sum(z) distributed according
# to Kernel SHAP weights -> (m x p) matrix.
# The argument S can be used to restrict the range of sum(z).
Expand Down
21 changes: 4 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

The package contains two workhorses to calculate SHAP values for *any* model:

- `permshap()`: Exact permutation SHAP algorithm of [1]. Available for up to $p=14$ features.
- `permshap()`: Exact permutation SHAP algorithm of [1]. Recommended for up to 8-10 features.
- `kernelshap()`: Kernel SHAP algorithm of [2] and [3]. By default, exact Kernel SHAP is used for up to $p=8$ features, and an almost exact hybrid algorithm otherwise.

Furthermore, the function `additive_shap()` produces SHAP values for additive models fitted via `lm()`, `glm()`, `mgcv::gam()`, `mgcv::bam()`, `gam::gam()`,
Expand All @@ -38,7 +38,6 @@ If the training data is small, use the full training data. In cases with a natur
**Remarks**

- Multivariate predictions are handled at no additional computational cost.
- Factor-valued predictions are automatically turned into one-hot-encoded columns.
- Case weights are supported via the argument `bg_w`.
- By changing the defaults in `kernelshap()`, the iterative pure sampling approach in [3] can be enforced.
- The `additive_shap()` explainer is easier to use: Only the model and `X` are required.
Expand All @@ -55,7 +54,7 @@ devtools::install_github("ModelOriented/kernelshap")

## Basic Usage

Let's model diamond prices with a (not too complex) random forest. As an alternative, you could use the {treeshap} package in this situation.
Let's model diamond prices with a random forest. As an alternative, you could use the {treeshap} package in this situation.

```r
library(kernelshap)
Expand Down Expand Up @@ -236,7 +235,6 @@ sv_dependence(shap_values, v = "carat", color_var = NULL)
{kernelshap} supports multivariate predictions like:

- probabilistic classification,
- non-probabilistic classification (factor-valued responses are turned into dummies),
- regression with multivariate response, and
- predictions found by applying multiple regression models.

Expand All @@ -255,10 +253,6 @@ ps_prob <- permshap(fit_prob, X = iris[, -5], bg_X = iris) |>
shapviz()
sv_importance(ps_prob)
sv_dependence(ps_prob, "Petal.Length")

# Non-probabilistic classification (results not shown)
fit_class <- ranger(Species ~ ., data = iris, num.trees = 20)
ps_class <- permshap(fit_class, X = iris[, -5], bg_X = iris)
```

![](man/figures/README-prob-imp.svg)
Expand Down Expand Up @@ -291,7 +285,7 @@ iris_wf <- workflow() |>
fit <- iris_wf |>
fit(iris)

system.time( # 6s
system.time( # 4s
ps <- permshap(fit, iris[-5], bg_X = iris, type = "prob")
)
ps
Expand Down Expand Up @@ -333,12 +327,9 @@ task_classif <- TaskClassif$new(id = "1", backend = iris, target = "Species")
learner_classif <- lrn("classif.rpart", predict_type = "prob")
learner_classif$train(task_classif)

predict(learner_classif, head(iris)) # setosa setosa # Classes
predict(learner_classif, head(iris), predict_type = "prob") # Probs per class

x <- learner_classif$selected_features()

# For *probabilistic* classification, pass predict_type = "prob" to mlr3's predict()
# Don't forget to pass predict_type = "prob" to mlr3's predict()
ps <- permshap(
learner_classif, X = iris, bg_X = iris, feature_names = x, predict_type = "prob"
)
Expand All @@ -347,10 +338,6 @@ ps
# Petal.Length Petal.Width
# [1,] 0.6666667 0
# [2,] 0.6666667 0

# Non-probabilistic classification uses auto-OHE internally
ps <- permshap(learner_classif, X = iris, bg_X = iris, feature_names = x)
ps
```

## References
Expand Down
9 changes: 5 additions & 4 deletions packaging.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ library(usethis)
use_description(
fields = list(
Title = "Kernel SHAP",
Version = "0.5.1",
Version = "0.6.0",
Description = "Efficient implementation of Kernel SHAP, see Lundberg and Lee (2017),
and Covert and Lee (2021) <http://proceedings.mlr.press/v130/covert21a>.
Furthermore, for up to 14 features, exact permutation SHAP values can be calculated.
The package plays well together with meta-learning packages like 'tidymodels', 'caret' or 'mlr3'.
Visualizations can be done using the R package 'shapviz'.",
`Authors@R` =
"c(person('Michael', family='Mayer', role=c('aut', 'cre'), email='[email protected]'),
person('David', family='Watson', role='aut', email='[email protected]'),
"c(person('Michael', family='Mayer', role=c('aut', 'cre'), email='[email protected]', comment=c(ORCID='0000-0002-6148-5756')),
person('David', family='Watson', role='aut', email='[email protected]', comment=c(ORCID='0000-0001-9632-2159')),
person('Przemyslaw', family='Biecek', email='[email protected]', role='ctb', comment=c(ORCID='0000-0001-8423-1823'))
)",
Depends = "R (>= 3.2.0)",
Expand All @@ -32,9 +32,10 @@ use_description(
roxygen = TRUE
)

use_package("foreach", "Imports")
use_package("MASS", "Imports")
use_package("stats", "Imports")
use_package("utils", "Imports")
use_package("foreach", "Imports")

use_package("doFuture", "Suggests")

Expand Down
14 changes: 0 additions & 14 deletions tests/testthat/test-kernelshap-multioutput.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,3 @@ test_that("Baseline equals weighted average prediction on background data", {
test_that("SHAP + baseline = prediction works with case weights", {
expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
})

test_that("factor predictions work", {
pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
X <- cbind(v1 = 0:1, v2 = 0)
out <- kernelshap(1, X = X, bg_X = X, pred_fun = pf, verbose = FALSE)
expect_equal(colnames(out$S$zero), c("v1", "v2"))
expect_equal(names(out$S), c("zero", "one"))
expect_equal(out$predictions, cbind(zero = 1:0, one = 0:1))

# with weights
w <- rep(2, nrow(X))
out2 <- kernelshap(1, X = X, bg_X = X, bg_w = w, pred_fun = pf, verbose = FALSE)
expect_equal(out, out2)
})
1 change: 0 additions & 1 deletion tests/testthat/test-kernelshap.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,3 @@ test_that("kernelshap works for large p (hybrid case)", {
expect_equal(rowSums(s$S) + s$baseline, unname(predict(fit, X[1L, ])))
})


13 changes: 0 additions & 13 deletions tests/testthat/test-permshap-multioutput.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,3 @@ test_that("SHAP + baseline = prediction works with case weights", {
expect_equal(rowSums(sY$S[[2L]]) + sY$baseline[2L], predsY[5:10, 2L])
})

test_that("factor predictions work", {
pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
X <- cbind(v1 = 0:1, v2 = 0)
out <- permshap(1, X = X, bg_X = X, pred_fun = pf, verbose = FALSE)
expect_equal(colnames(out$S$zero), c("v1", "v2"))
expect_equal(names(out$S), c("zero", "one"))
expect_equal(out$predictions, cbind(zero = 1:0, one = 0:1))

# with weights
w <- rep(2, nrow(X))
out2 <- permshap(1, X = X, bg_X = X, bg_w = w, pred_fun = pf, verbose = FALSE)
expect_equal(out, out2)
})
28 changes: 0 additions & 28 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,27 +60,6 @@ test_that("rep_each() works", {
expect_true(is.integer(rep_each(100, 100)))
})

test_that("fdummy() works", {
x <- c("A", "A", "C", "D")
mm <- matrix(model.matrix(~ x + 0), ncol = 3, dimnames = list(NULL, c("A", "C", "D")))
expect_equal(fdummy(x), mm)
})

test_that("fdummy() works for singletons", {
x <- c("A")
expect_equal(fdummy(x), cbind(A = 1))
expect_true(is.matrix(fdummy(x)))
})

test_that("fdummy() respects factor level order", {
x1 <- factor(c("A", "A", "C", "D"))
x2 <- factor(x1, levels = rev(levels(x1)))
d1 <- fdummy(x1)
d2 <- fdummy(x2)
expect_equal(d1, d2[, colnames(d1)])
expect_equal(colnames(d1), rev(colnames(d2)))
})

test_that("wrowmean_vector() works for 1D matrices", {
x2 <- cbind(a = 6:1)
out2 <- wrowmean_vector(x2, ngroups = 2L)
Expand All @@ -103,10 +82,3 @@ test_that("wrowmean_vector() works for 1D matrices", {
expect_equal(out2, cbind(a = c(a, b)))
})

test_that("rowmean_factor() works for factor input", {
x <- factor(c("C", "A", "C", "C", "A", "A"))
out <- rowmean_factor(x, ngroups = 2L)

expect_true(is.matrix(out))
expect_equal(out, cbind(A = c(1/3, 2/3), C = c(2/3, 1/3)))
})

0 comments on commit 5c3914f

Please sign in to comment.