From a969d3331ccc5aed3f3d4ef953c05cf601e312d9 Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <w.l.netto@medisin.uio.no>
Date: Tue, 30 Jul 2024 13:38:35 +0200
Subject: [PATCH 1/2] Replace `parallel` with `cl` (closes #40)

Since `cl` only makes sense if `parallel = TRUE`, the latter can be
replaced with the former (see complete discussion on #40).
---
 R/MADMMplasso.R                     | 12 ++++++------
 R/cv_MADMMplasso.R                  |  4 ++--
 inst/examples/MADMMplasso_example.R |  2 +-
 man/MADMMplasso.Rd                  | 11 ++++-------
 man/cv_MADMMplasso.Rd               |  9 +++------
 tests/testthat/test-MADMMplasso.R   |  4 ++--
 tests/testthat/test-parallel.R      | 20 +++++++-------------
 7 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R
index 7607917..17c4dc1 100644
--- a/R/MADMMplasso.R
+++ b/R/MADMMplasso.R
@@ -19,8 +19,7 @@
 #' @param my_print Should information form each ADMM iteration be printed along the way? This prints the dual and primal residuals
 #' @param alph an overrelaxation parameter in \[1, 1.8\]. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code}
 #' @param tree The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure.
-#' @param parallel should parallel processing be used or not? If set to `TRUE`, pal should be set to `FALSE`.
-#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available?
+#' @param pal Should the lapply function be applied for an alternative to parallelization.
 #' @param tol threshold for the non-zero coefficients
 #' @param cl The number of CPUs to be used for parallel processing
 #' @param legacy If \code{TRUE}, use the R version of the algorithm
@@ -49,10 +48,11 @@
 
 #' @example inst/examples/MADMMplasso_example.R
 #' @export
-MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, parallel = TRUE, pal = !parallel, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) {
-  if (parallel && pal) {
-    stop("parallel and pal cannot be TRUE at the same time")
-  }
+MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, pal = cl == 1L, gg = NULL, tol = 1E-4, cl = detectCores() - 1L, legacy = FALSE) {
+  # Recalculating the number of CPUs
+  cl <- ifelse(pal, 1L, cl) # cl is irrelevant if pal = TRUE
+  parallel <- cl > 1L
+
   N <- nrow(X)
 
   p <- ncol(X)
diff --git a/R/cv_MADMMplasso.R b/R/cv_MADMMplasso.R
index 4f25b22..16bbbd5 100644
--- a/R/cv_MADMMplasso.R
+++ b/R/cv_MADMMplasso.R
@@ -11,7 +11,7 @@
 #' @return  results containing the CV values
 #' @example inst/examples/cv_MADMMplasso_example.R
 #' @export
-cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambdas, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, nlambda, rho = 5, my_print = FALSE, alph = 1, foldid = NULL, parallel = TRUE, pal = FALSE, gg = c(7, 0.5), TT, tol = 1E-4, cl = 2, legacy = FALSE) {
+cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambdas, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, nlambda, rho = 5, my_print = FALSE, alph = 1, foldid = NULL, pal = cl == 1L, gg = c(7, 0.5), TT, tol = 1E-4, cl = detectCores() - 1L, legacy = FALSE) {
   BIG <- 10e9
   no <- nrow(X)
   ggg <- vector("list", nfolds)
@@ -28,7 +28,7 @@ cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambd
     print(c("fold,", ii))
     oo <- foldid == ii
 
-    ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = FALSE], Z = Z[!oo, , drop = FALSE], y = y[!oo, , drop = FALSE], alpha = alpha, my_lambda = lambda, lambda_min = 0.01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, parallel = parallel, pal = pal, gg = gg, tol = tol, cl = cl, legacy)
+    ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = FALSE], Z = Z[!oo, , drop = FALSE], y = y[!oo, , drop = FALSE], alpha = alpha, my_lambda = lambda, lambda_min = 0.01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, pal = pal, gg = gg, tol = tol, cl = cl, legacy)
 
     cv_p <- predict.MADMMplasso(ggg[[ii]], X = X[oo, , drop = FALSE], Z = Z[oo, ], y = y[oo, ])
     ggg[[ii]] <- 0
diff --git a/inst/examples/MADMMplasso_example.R b/inst/examples/MADMMplasso_example.R
index 8b7c3c6..af035ff 100644
--- a/inst/examples/MADMMplasso_example.R
+++ b/inst/examples/MADMMplasso_example.R
@@ -83,6 +83,6 @@ fit <- MADMMplasso(
   X, Z, y,
   alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1),
   lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda,
-  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, parallel = FALSE,
+  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE,
   pal = TRUE, gg = gg1, tol = tol, cl = 6
 )
diff --git a/man/MADMMplasso.Rd b/man/MADMMplasso.Rd
index fdc31b8..15497bf 100644
--- a/man/MADMMplasso.Rd
+++ b/man/MADMMplasso.Rd
@@ -22,11 +22,10 @@ MADMMplasso(
   my_print = FALSE,
   alph = 1.8,
   tree,
-  parallel = TRUE,
-  pal = !parallel,
+  pal = cl == 1L,
   gg = NULL,
   tol = 1e-04,
-  cl = 4,
+  cl = detectCores() - 1L,
   legacy = FALSE
 )
 }
@@ -62,9 +61,7 @@ Categorical variables should be coded by 0-1 dummy variables: for a k-level vari
 
 \item{tree}{The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure.}
 
-\item{parallel}{should parallel processing be used or not? If set to \code{TRUE}, pal should be set to \code{FALSE}.}
-
-\item{pal}{Should the lapply function be applied for an alternative quicker optimization when there no parallel package available?}
+\item{pal}{Should the lapply function be applied for an alternative to parallelization.}
 
 \item{gg}{penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit.}
 
@@ -189,7 +186,7 @@ fit <- MADMMplasso(
   X, Z, y,
   alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1),
   lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda,
-  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE, parallel = FALSE,
+  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = TRUE,
   pal = TRUE, gg = gg1, tol = tol, cl = 6
 )
 }
diff --git a/man/cv_MADMMplasso.Rd b/man/cv_MADMMplasso.Rd
index fe034da..b368c7c 100644
--- a/man/cv_MADMMplasso.Rd
+++ b/man/cv_MADMMplasso.Rd
@@ -20,12 +20,11 @@ cv_MADMMplasso(
   my_print = FALSE,
   alph = 1,
   foldid = NULL,
-  parallel = TRUE,
-  pal = FALSE,
+  pal = cl == 1L,
   gg = c(7, 0.5),
   TT,
   tol = 1e-04,
-  cl = 2,
+  cl = detectCores() - 1L,
   legacy = FALSE
 )
 }
@@ -61,9 +60,7 @@ Categorical variables should be coded by 0-1 dummy variables: for a k-level vari
 
 \item{foldid}{vector with values in 1:K, indicating folds for K-fold CV. Default NULL}
 
-\item{parallel}{should parallel processing be used or not? If set to \code{TRUE}, pal should be set to \code{FALSE}.}
-
-\item{pal}{Should the lapply function be applied for an alternative quicker optimization when there no parallel package available?}
+\item{pal}{Should the lapply function be applied for an alternative to parallelization.}
 
 \item{gg}{penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit.}
 
diff --git a/tests/testthat/test-MADMMplasso.R b/tests/testthat/test-MADMMplasso.R
index daa5f7b..8143294 100644
--- a/tests/testthat/test-MADMMplasso.R
+++ b/tests/testthat/test-MADMMplasso.R
@@ -84,7 +84,7 @@ fit_C <- MADMMplasso(
   X, Z, y,
   alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1),
   lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda,
-  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, parallel = FALSE,
+  nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1,
   pal = TRUE, gg = gg1, tol = tol, cl = 6
 )
 set.seed(9356219)
@@ -94,7 +94,7 @@ fit_R <- suppressWarnings(
       X, Z, y,
       alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1),
       lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda,
-      nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, parallel = FALSE,
+      nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1,
       pal = TRUE, gg = gg1, tol = tol, cl = 6, legacy = TRUE
     )
   )
diff --git a/tests/testthat/test-parallel.R b/tests/testthat/test-parallel.R
index 9597485..9ec8e10 100644
--- a/tests/testthat/test-parallel.R
+++ b/tests/testthat/test-parallel.R
@@ -81,19 +81,19 @@ mad_wrap <- function(seed = 3398, ...) {
       alpha = 0.2, my_lambda = matrix(rep(0.2, ncol(y)), 1),
       lambda_min = 0.001, max_it = 5000, e.abs = 1e-4, e.rel = 1e-2, maxgrid = 1L,
       nlambda = 1L, rho = 5, tree = TT, my_print = FALSE, alph = 1, gg = gg1,
-      tol = 1e-3, cl = 2, ...
+      tol = 1e-3, ...
     )
   )
 }
-fit_R <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = FALSE)
-fit_C <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = FALSE)
-fit_R_pal <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = TRUE)
-fit_C_pal <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = TRUE)
+fit_R <- mad_wrap(legacy = TRUE, cl = 1L, pal = FALSE)
+fit_C <- mad_wrap(legacy = FALSE, cl = 1L, pal = FALSE)
+fit_R_pal <- mad_wrap(legacy = TRUE, cl = 1L, pal = TRUE)
+fit_C_pal <- mad_wrap(legacy = FALSE, cl = 1L, pal = TRUE)
 
 # Restrict to *nix machines
 if (.Platform$OS.type == "unix") {
-  fit_R_parallel <- mad_wrap(legacy = TRUE, parallel = TRUE, pal = FALSE)
-  fit_C_parallel <- mad_wrap(legacy = FALSE, parallel = TRUE, pal = FALSE)
+  fit_R_parallel <- mad_wrap(legacy = TRUE, cl = 2L, pal = FALSE)
+  fit_C_parallel <- mad_wrap(legacy = FALSE, cl = 2L, pal = FALSE)
 }
 
 test_that("results are identical after parallelization", {
@@ -106,9 +106,3 @@ test_that("results are identical after parallelization", {
     expect_identical(fit_C_pal, fit_C_parallel)
   }
 })
-
-test_that("parallel and pal cannot be both true", {
-  msg <- "parallel and pal cannot be TRUE at the same time"
-  expect_error(mad_wrap(legacy = TRUE, parallel = TRUE, pal = TRUE), msg)
-  expect_error(mad_wrap(legacy = FALSE, parallel = TRUE, pal = TRUE), msg)
-})

From 69e1ba2a3cc32b4ff98f95e9ff90ef3b9f47f4cb Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <w.l.netto@medisin.uio.no>
Date: Tue, 30 Jul 2024 13:39:56 +0200
Subject: [PATCH 2/2] Increment version number to 0.0.0.9019

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 53ae0e7..c1f6847 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: MADMMplasso
 Title: Multi Variate Multi Response 'ADMM' with Interaction Effects
-Version: 0.0.0.9018
+Version: 0.0.0.9019
 Authors@R:
     c(
         person(