From a969d3331ccc5aed3f3d4ef953c05cf601e312d9 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 30 Jul 2024 13:38:35 +0200 Subject: [PATCH 1/2] Replace `parallel` with `cl` (closes #40) Since `cl` only makes sense if `parallel = TRUE`, the latter can be replaced with the former (see complete discussion on #40). --- R/MADMMplasso.R | 12 ++++++------ R/cv_MADMMplasso.R | 4 ++-- inst/examples/MADMMplasso_example.R | 2 +- man/MADMMplasso.Rd | 11 ++++------- man/cv_MADMMplasso.Rd | 9 +++------ tests/testthat/test-MADMMplasso.R | 4 ++-- tests/testthat/test-parallel.R | 20 +++++++------------- 7 files changed, 25 insertions(+), 37 deletions(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index 7607917..17c4dc1 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -19,8 +19,7 @@ #' @param my_print Should information form each ADMM iteration be printed along the way? This prints the dual and primal residuals #' @param alph an overrelaxation parameter in \[1, 1.8\]. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code} #' @param tree The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure. -#' @param parallel should parallel processing be used or not? If set to `TRUE`, pal should be set to `FALSE`. -#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available? +#' @param pal Should the lapply function be applied for an alternative to parallelization. #' @param tol threshold for the non-zero coefficients #' @param cl The number of CPUs to be used for parallel processing #' @param legacy If \code{TRUE}, use the R version of the algorithm @@ -49,10 +48,11 @@ #' @example inst/examples/MADMMplasso_example.R #' @export -MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, parallel = TRUE, pal = !parallel, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) { - if (parallel && pal) { - stop("parallel and pal cannot be TRUE at the same time") - } +MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, pal = cl == 1L, gg = NULL, tol = 1E-4, cl = detectCores() - 1L, legacy = FALSE) { + # Recalculating the number of CPUs + cl <- ifelse(pal, 1L, cl) # cl is irrelevant if pal = TRUE + parallel <- cl > 1L + N <- nrow(X) p <- ncol(X) diff --git a/R/cv_MADMMplasso.R b/R/cv_MADMMplasso.R index 4f25b22..16bbbd5 100644 --- a/R/cv_MADMMplasso.R +++ b/R/cv_MADMMplasso.R @@ -11,7 +11,7 @@ #' @return results containing the CV values #' @example inst/examples/cv_MADMMplasso_example.R #' @export -cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambdas, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, nlambda, rho = 5, my_print = FALSE, alph = 1, foldid = NULL, parallel = TRUE, pal = FALSE, gg = c(7, 0.5), TT, tol = 1E-4, cl = 2, legacy = FALSE) { +cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambdas, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, nlambda, rho = 5, my_print = FALSE, alph = 1, foldid = NULL, pal = cl == 1L, gg = c(7, 0.5), TT, tol = 1E-4, cl = detectCores() - 1L, legacy = FALSE) { BIG <- 10e9 no <- nrow(X) ggg <- vector("list", nfolds) @@ -28,7 +28,7 @@ cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambd print(c("fold,", ii)) oo <- foldid == ii - ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = FALSE], Z = Z[!oo, , drop = FALSE], y = y[!oo, , drop = FALSE], alpha = alpha, my_lambda = lambda, lambda_min = 0.01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, parallel = parallel, pal = pal, gg = gg, tol = tol, cl = cl, legacy) + ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = FALSE], Z = Z[!oo, , drop = FALSE], y = y[!oo, , drop = FALSE], alpha = alpha, my_lambda = lambda, lambda_min = 0.01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, pal = pal, gg = gg, tol = tol, cl = cl, legacy) cv_p <- predict.MADMMplasso(ggg[[ii]], X = X[oo, , drop = FALSE], Z = Z[oo, ], y = y[oo, ]) ggg[[ii]] <- 0 diff --git a/inst/examples/MADMMplasso_example.R b/inst/examples/MADMMplasso_example.R index 8b7c3c6..af035ff 100644 --- a/inst/examples/MADMMplasso_example.R +++ b/inst/examples/MADMMplasso_example.R @@ -83,6 +83,6 @@ fit <- MADMMplasso( X, Z, y, alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, - nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, parallel = FALSE, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, pal = TRUE, gg = gg1, tol = tol, cl = 6 ) diff --git a/man/MADMMplasso.Rd b/man/MADMMplasso.Rd index fdc31b8..15497bf 100644 --- a/man/MADMMplasso.Rd +++ b/man/MADMMplasso.Rd @@ -22,11 +22,10 @@ MADMMplasso( my_print = FALSE, alph = 1.8, tree, - parallel = TRUE, - pal = !parallel, + pal = cl == 1L, gg = NULL, tol = 1e-04, - cl = 4, + cl = detectCores() - 1L, legacy = FALSE ) } @@ -62,9 +61,7 @@ Categorical variables should be coded by 0-1 dummy variables: for a k-level vari \item{tree}{The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure.} -\item{parallel}{should parallel processing be used or not? If set to \code{TRUE}, pal should be set to \code{FALSE}.} - -\item{pal}{Should the lapply function be applied for an alternative quicker optimization when there no parallel package available?} +\item{pal}{Should the lapply function be applied for an alternative to parallelization.} \item{gg}{penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit.} @@ -189,7 +186,7 @@ fit <- MADMMplasso( X, Z, y, alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, - nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, parallel = FALSE, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, pal = TRUE, gg = gg1, tol = tol, cl = 6 ) } diff --git a/man/cv_MADMMplasso.Rd b/man/cv_MADMMplasso.Rd index fe034da..b368c7c 100644 --- a/man/cv_MADMMplasso.Rd +++ b/man/cv_MADMMplasso.Rd @@ -20,12 +20,11 @@ cv_MADMMplasso( my_print = FALSE, alph = 1, foldid = NULL, - parallel = TRUE, - pal = FALSE, + pal = cl == 1L, gg = c(7, 0.5), TT, tol = 1e-04, - cl = 2, + cl = detectCores() - 1L, legacy = FALSE ) } @@ -61,9 +60,7 @@ Categorical variables should be coded by 0-1 dummy variables: for a k-level vari \item{foldid}{vector with values in 1:K, indicating folds for K-fold CV. Default NULL} -\item{parallel}{should parallel processing be used or not? If set to \code{TRUE}, pal should be set to \code{FALSE}.} - -\item{pal}{Should the lapply function be applied for an alternative quicker optimization when there no parallel package available?} +\item{pal}{Should the lapply function be applied for an alternative to parallelization.} \item{gg}{penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit.} diff --git a/tests/testthat/test-MADMMplasso.R b/tests/testthat/test-MADMMplasso.R index daa5f7b..8143294 100644 --- a/tests/testthat/test-MADMMplasso.R +++ b/tests/testthat/test-MADMMplasso.R @@ -84,7 +84,7 @@ fit_C <- MADMMplasso( X, Z, y, alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, - nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, parallel = FALSE, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, pal = TRUE, gg = gg1, tol = tol, cl = 6 ) set.seed(9356219) @@ -94,7 +94,7 @@ fit_R <- suppressWarnings( X, Z, y, alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, - nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, parallel = FALSE, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, pal = TRUE, gg = gg1, tol = tol, cl = 6, legacy = TRUE ) ) diff --git a/tests/testthat/test-parallel.R b/tests/testthat/test-parallel.R index 9597485..9ec8e10 100644 --- a/tests/testthat/test-parallel.R +++ b/tests/testthat/test-parallel.R @@ -81,19 +81,19 @@ mad_wrap <- function(seed = 3398, ...) { alpha = 0.2, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = 1e-4, e.rel = 1e-2, maxgrid = 1L, nlambda = 1L, rho = 5, tree = TT, my_print = FALSE, alph = 1, gg = gg1, - tol = 1e-3, cl = 2, ... + tol = 1e-3, ... ) ) } -fit_R <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = FALSE) -fit_C <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = FALSE) -fit_R_pal <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = TRUE) -fit_C_pal <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = TRUE) +fit_R <- mad_wrap(legacy = TRUE, cl = 1L, pal = FALSE) +fit_C <- mad_wrap(legacy = FALSE, cl = 1L, pal = FALSE) +fit_R_pal <- mad_wrap(legacy = TRUE, cl = 1L, pal = TRUE) +fit_C_pal <- mad_wrap(legacy = FALSE, cl = 1L, pal = TRUE) # Restrict to *nix machines if (.Platform$OS.type == "unix") { - fit_R_parallel <- mad_wrap(legacy = TRUE, parallel = TRUE, pal = FALSE) - fit_C_parallel <- mad_wrap(legacy = FALSE, parallel = TRUE, pal = FALSE) + fit_R_parallel <- mad_wrap(legacy = TRUE, cl = 2L, pal = FALSE) + fit_C_parallel <- mad_wrap(legacy = FALSE, cl = 2L, pal = FALSE) } test_that("results are identical after parallelization", { @@ -106,9 +106,3 @@ test_that("results are identical after parallelization", { expect_identical(fit_C_pal, fit_C_parallel) } }) - -test_that("parallel and pal cannot be both true", { - msg <- "parallel and pal cannot be TRUE at the same time" - expect_error(mad_wrap(legacy = TRUE, parallel = TRUE, pal = TRUE), msg) - expect_error(mad_wrap(legacy = FALSE, parallel = TRUE, pal = TRUE), msg) -}) From 69e1ba2a3cc32b4ff98f95e9ff90ef3b9f47f4cb Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 30 Jul 2024 13:39:56 +0200 Subject: [PATCH 2/2] Increment version number to 0.0.0.9019 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53ae0e7..c1f6847 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: MADMMplasso Title: Multi Variate Multi Response 'ADMM' with Interaction Effects -Version: 0.0.0.9018 +Version: 0.0.0.9019 Authors@R: c( person(