Skip to content

Commit

Permalink
Store pca models for umap_transform when ret_model = TRUE
Browse files Browse the repository at this point in the history
  • Loading branch information
jlmelville committed Dec 9, 2018
1 parent 0e4b0eb commit 7bbf94f
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 11 deletions.
26 changes: 21 additions & 5 deletions R/init.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,12 @@ pca_scores <- function(X, ncol = min(dim(X)), ret_extra = FALSE,
# irlba warns about using too large a percentage of total singular value
# so don't use if dataset is small compared to ncol
if (ncol < 0.5 * min(dim(X))) {
return(irlba_scores(X, ncol = ncol))
return(irlba_scores(X, ncol = ncol, ret_extra = ret_extra))
}

# need extra data if we want to re-apply PCA to new points in umap_transform
rotation <- NULL
center <- NULL
if (methods::is(X, "dist")) {
res_mds <- stats::cmdscale(X, x.ret = TRUE, eig = TRUE, k = ncol)

Expand All @@ -144,7 +147,7 @@ pca_scores <- function(X, ncol = min(dim(X)), ret_extra = FALSE,
else {
X <- scale(X, center = TRUE, scale = FALSE)
# do SVD on X directly rather than forming covariance matrix
s <- svd(X, nu = ncol, nv = 0)
s <- svd(X, nu = ncol, nv = ifelse(ret_extra, ncol, 0))
D <- diag(c(s$d[1:ncol]), ncol, ncol)
if (verbose || ret_extra) {
# calculate eigenvalues of covariance matrix from singular values
Expand All @@ -156,12 +159,18 @@ pca_scores <- function(X, ncol = min(dim(X)), ret_extra = FALSE,
)
}
scores <- s$u %*% D
if (ret_extra) {
rotation <- s$v
center <- attr(X, "scaled:center")
}
}

if (ret_extra) {
list(
scores = scores,
lambda = lambda[1:ncol]
lambda = lambda[1:ncol],
rotation = rotation,
center = center
)
}
else {
Expand All @@ -170,6 +179,13 @@ pca_scores <- function(X, ncol = min(dim(X)), ret_extra = FALSE,
}

# Get PCA scores via irlba
irlba_scores <- function(X, ncol) {
irlba::prcomp_irlba(X, n = ncol, retx = TRUE, center = TRUE, scale = FALSE)$x
irlba_scores <- function(X, ncol, ret_extra = FALSE) {
res <- irlba::prcomp_irlba(X, n = ncol, retx = TRUE, center = TRUE,
scale = FALSE)
if (ret_extra) {
list(scores = res$X, rotation = res$rotation, center = res$center)
}
else {
res$x
}
}
9 changes: 9 additions & 0 deletions R/transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ umap_transform <- function(X, model,
method <- model$method
scale_info <- model$scale_info
metric <- model$metric
pca_models <- model$pca_models

a <- model$a
b <- model$b
Expand Down Expand Up @@ -112,6 +113,10 @@ umap_transform <- function(X, model,
Xsub <- X[, subset, drop = FALSE]
}

if (!is.null(pca_models) && !is.null(pca_models[[as.character(i)]])) {
Xsub <- apply_pca(X = Xsub, pca_res = pca_models[[as.character(i)]])
}

nn <- annoy_search(Xsub,
k = n_neighbors, ann = ann, search_k = search_k,
n_threads = n_threads, grain_size = grain_size,
Expand Down Expand Up @@ -306,3 +311,7 @@ apply_scaling <- function(X, scale_info, verbose = FALSE) {

X
}
# Apply a previously calculated set of PCA rotations
apply_pca <- function(X, pca_res) {
sweep(X, 2, pca_res$center) %*% pca_res$rotation
}
30 changes: 27 additions & 3 deletions R/uwot.R
Original file line number Diff line number Diff line change
Expand Up @@ -967,11 +967,20 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",

# For typical case of numeric matrix X and metric = "euclidean", save
# PCA results here in case initialization uses PCA too
pca_models <- NULL
pca_shortcut <- FALSE
if (!is.null(pca) && length(metric) == 1 && metric == "euclidean" &&
is.matrix(X) && ncol(X) > pca) {
tsmessage("Reducing X column dimension to ", pca, " via PCA")
X <- pca_scores(X, ncol = pca)
pca_res <- pca_scores(X, ncol = pca, ret_extra = ret_model)
if (ret_model) {
X <- pca_res$scores
pca_models[["1"]] <- pca_res[c("center", "rotation")]
pca_res <- NULL
}
else {
X <- pca_res
}
pca_shortcut <- TRUE
}

Expand All @@ -988,6 +997,9 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",

V <- d2sr$V
nns <- d2sr$nns
if (is.null(pca_models)) {
pca_models <- d2sr$pca_models
}

if (!is.null(y)) {
tsmessage("Processing y data")
Expand Down Expand Up @@ -1225,6 +1237,9 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
else {
res$nn_index <- nns[[1]]$index
}
if (!is.null(pca_models)) {
res$pca_models <- pca_models
}
}
if (ret_nn) {
res$nn <- list()
Expand Down Expand Up @@ -1321,6 +1336,7 @@ data2set <- function(X, Xcat, n_neighbors, metrics, nn_method,
}
}

pca_models <- list()
for (i in 1:nblocks) {
metric <- mnames[[i]]
metric <- match.arg(metric, c("euclidean", "cosine", "manhattan",
Expand Down Expand Up @@ -1356,7 +1372,15 @@ data2set <- function(X, Xcat, n_neighbors, metrics, nn_method,
if (!is.null(pca) && is.matrix(X) && metric == "euclidean" &&
ncol(X) > pca && nrow(X) > pca) {
tsmessage("Reducing column dimension to ", pca, " via PCA")
Xsub <- pca_scores(Xsub, pca)
pca_res <- pca_scores(Xsub, pca, ret_extra = ret_model)
if (ret_model) {
Xsub <- pca_res$scores
pca_models[[as.character(i)]] <- pca_res[c("center", "rotation")]
pca_res <- NULL
}
else {
Xsub <- pca_res
}
}

nn_sub <- nn_method
Expand Down Expand Up @@ -1397,7 +1421,7 @@ data2set <- function(X, Xcat, n_neighbors, metrics, nn_method,
V <- categorical_intersection_df(Xcat, V, weight = 0.5, verbose = verbose)
}

list(V = V, nns = nns)
list(V = V, nns = nns, pca_models = pca_models)
}

x2nn <- function(X, n_neighbors, metric, nn_method,
Expand Down
34 changes: 32 additions & 2 deletions tests/testthat/test_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,41 @@ expect_ok_matrix(res_trans)
# PCA dimensionality reduction
res <- umap(iris10,
n_neighbors = 4, n_epochs = 2, alpha = 0.5,
init = "spca", verbose = FALSE, n_threads = 0, pca = 2)
expect_ok_matrix(res)
init = "spca", verbose = FALSE, n_threads = 0, pca = 2,
ret_model = TRUE)
expect_ok_matrix(res$embedding)
expect_is(res$pca_models, "list")
expect_equal(length(res$pca_models), 1)
expect_ok_matrix(res$pca_models[["1"]]$rotation, nr = 4, nc = 2)
expect_equal(res$pca_models[["1"]]$center, c(4.86, 3.31, 1.45, 0.22),
check.attributes = FALSE)

res <- umap(iris10,
n_neighbors = 4, n_epochs = 2, alpha = 0.5,
metric = list("euclidean" = 1:2, "euclidean" = 3:4),
init = "spca", verbose = FALSE, n_threads = 0, pca = 1)
expect_ok_matrix(res)

# Mixed metrics, PCA and transform
set.seed(1337)
res <- umap(iris10,
n_neighbors = 4, n_epochs = 2, alpha = 0.5,
init = "spca", verbose = FALSE, n_threads = 0,
metric = list(euclidean = c(1, 2), cosine = 1:4,
euclidean = c(3, 4)),
pca = 2,
ret_model = TRUE)
expect_ok_matrix(res$embedding)
expect_is(res$pca_models, "list")
expect_equal(length(res$pca_models), 2)
expect_equal(names(res$pca_models), c("1", "3"))
expect_ok_matrix(res$pca_models[["1"]]$rotation, nr = 2, nc = 2)
expect_equal(res$pca_models[["1"]]$center, c(4.86, 3.31),
check.attributes = FALSE)
expect_ok_matrix(res$pca_models[["3"]]$rotation, nr = 2, nc = 2)
expect_equal(res$pca_models[["3"]]$center, c(1.45, 0.22),
check.attributes = FALSE)

res_trans <- umap_transform(iris10, model = res, verbose = FALSE, n_threads = 0,
n_epochs = 2)
expect_ok_matrix(res_trans)
5 changes: 4 additions & 1 deletion tests/testthat/test_transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,7 @@ iris10_colrange <- scale_input(iris10, scale_type = "colrange", ret_model = TRUE
iris10_crtrans <- apply_scaling(iris10, attr_to_scale_info(iris10_colrange))
expect_equal(iris10_colrange, iris10_crtrans, check.attributes = FALSE)


# test pca transform works
iris10pca <- pca_scores(iris10, ncol = 2, ret_extra = TRUE)
iris10pcat <- apply_pca(iris10, iris10pca)
expect_equal(iris10pca$scores, iris10pcat, check.attributes = FALSE)

0 comments on commit 7bbf94f

Please sign in to comment.