Add benchmark scripts and template

funecology · Dec 2, 2023 · e27a46b · e27a46b
1 parent b98a6fe
commit e27a46b
Show file tree

Hide file tree

Showing 7 changed files with 372 additions and 0 deletions.
diff --git a/inst/benchmark_scripts/.batchtools.slurm.tmpl b/inst/benchmark_scripts/.batchtools.slurm.tmpl
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+## Job Resource Interface Definition
+##
+## ntasks [integer(1)]:       Number of required tasks,
+##                            Set larger than 1 if you want to further parallelize
+##                            with MPI within your job.
+## ncpus [integer(1)]:        Number of required cpus per task,
+##                            Set larger than 1 if you want to further parallelize
+##                            with multicore/parallel within each task.
+## walltime [integer(1)]:     Walltime for this job, in seconds.
+##                            Must be at least 60 seconds for Slurm to work properly.
+## memory   [integer(1)]:     Memory in megabytes for each cpu.
+##                            Must be at least 100 (when I tried lower values my
+##                            jobs did not start at all).
+##
+## Default resources can be set in your .batchtools.conf.R by defining the variable
+## 'default.resources' as a named list.
+
+<%
+# relative paths are not handled well by Slurm
+log.file = fs::path_expand(log.file)
+-%>
+
+
+#SBATCH --job-name=<%= resources$job.name %>
+#SBATCH --output=<%= resources$output %>
+#SBATCH --error=<%= log.file %>
+#SBATCH --time=<%= resources$walltime %>
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=<%= resources$ncpus %>
+#SBATCH --mem-per-cpu=<%= resources$memory %>
+#SBATCH --mail-user=<%= resources$email %>
+#SBATCH --mail-type=ALL
+<%= if (!is.null(resources$partition)) sprintf(paste0("#SBATCH --partition='", resources$partition, "'")) %>
+<%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %>
+
+## Initialize work environment like
+module load foss/2020b R/4.0.4-2
+cd fundiversity
+
+## Export value of DEBUGME environemnt var to slave
+export DEBUGME=<%= Sys.getenv("DEBUGME") %>
+
+<%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%>
+<%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%>
+<%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%>
+
+## Run R:
+## we merge R output with stdout from SLURM, which gets then logged via --output option
+Rscript -e 'batchtools::doJobCollection("<%= uri %>")'
diff --git a/inst/benchmark_scripts/benchmark.R b/inst/benchmark_scripts/benchmark.R
@@ -0,0 +1,126 @@
+# Script to make benchmark across different functions of similar packages
+
+# Options ----------------------------------------------------------------------
+options(fundiversity.memoise = FALSE)  # Deactivate memoisation
+
+# Package & Functions ----------------------------------------------------------
+pkgload::load_all()  # Need last version of fundiversity
+source(
+  here::here("inst", "benchmark_scripts", "benchmark_fcts",
+             "fd_benchmark_multicore.R")
+)
+source(
+  here::here("inst", "benchmark_scripts", "benchmark_fcts",
+             "fd_benchmark_alternatives.R")
+)
+source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))
+
+
+# FD indices expressions -------------------------------------------------------
+
+dbfd_indices <- rlang::exprs(
+  FD_dbFD_fdiv = FD::dbFD(
+    given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = TRUE,
+    calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
+  ),
+  FD_dbFD_feve = FD::dbFD(
+    given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = FALSE,
+    calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
+  ),
+  FD_dbFD_fric = FD::dbFD(
+    given_data[["traits"]], given_data[["site_sp"]], calc.FRic = TRUE,
+    calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
+  ),
+  FD_dbFD_raoq =  FD::dbFD(
+    given_data[["traits"]], given_data[["site_sp"]], calc.FRic = FALSE,
+    calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
+  )
+)
+
+fd_indices <- list(
+  "fdis" = rlang::exprs(
+    fundiversity_fd_fdis_unparallel =
+      {
+        fd_fdis(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    BAT_dispersion = BAT::dispersion(
+      given_data[["site_sp"]], given_data[["traits"]]
+    ),
+    FD_fdisp        = FD::fdisp(
+      dist(given_data[["traits"]]), given_data[["site_sp"]]
+    ),
+    mFD_alpha_fd   = mFD::alpha.fd.multidim(
+      given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdis",
+      scaling = FALSE, verbose = FALSE
+    )
+  ),
+  "fdiv" = rlang::exprs(
+    fundiversity_fd_fdiv_unparallel =
+      {
+        fd_fdiv(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    mFD_alpha_fd   = mFD::alpha.fd.multidim(
+      given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdiv",
+      scaling = FALSE, verbose = FALSE
+    )
+  ),
+  "feve" = rlang::exprs(
+    fundiversity_fd_feve_unparallel =
+      {
+        fd_feve(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    mFD_alpha_fd   = mFD::alpha.fd.multidim(
+      given_data[["traits"]], given_data[["site_sp"]], ind_vect = "feve",
+      scaling = FALSE, verbose = FALSE
+    )
+  ),
+  "raoq" = rlang::exprs(
+    fundiversity_fd_raoq_unparallel =
+      {
+        fd_raoq(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    adiv_qe           = adiv::QE(
+      given_data[["site_sp"]], dist(given_data[["traits"]])
+    ),
+    BAT_rao           = BAT::rao(
+      given_data[["site_sp"]], distance = given_data[["traits"]]
+    ),
+    hillR_hill_func   = hillR::hill_func(
+      given_data[["site_sp"]], given_data[["traits"]], fdis = FALSE
+    ),
+    mFD_alpha_fd_hill = mFD::alpha.fd.hill(
+      given_data[["site_sp"]], dist(given_data[["traits"]]), q = 2,
+      tau = "max"
+    )
+  )
+)
+
+# Benchmarks -------------------------------------------------------------------
+bench_sites   = c(5e1 , 1e2, 5e2)
+bench_traits  = c(2,    4,  10)
+bench_species = c(2e2,  5e2, 1e3)
+
+n_iterations = 10
+
+## Regular indices benchmark
+# Parallelize across indices
+future::plan(
+  future.batchtools::batchtools_slurm,
+  template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
+  label     = "funbench",
+  resources = list(
+    job.name = "funbench",
+    walltime = 5760,
+    memory = "150G",
+    ncpus  = 1,
+    output   = "/work/%u/%j-%x.log",
+    email  = Sys.getenv("USEREMAIL")
+  ),
+  finalize = FALSE
+)
+
+fd_indices %>%
+  furrr::future_imap(
+    ~ fd_benchmark_alternatives(.x) %>%
+      saveRDS(glue::glue("bench_{index}_alternatives.rds", index = .y))
+  )
diff --git a/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R
@@ -0,0 +1,19 @@
+fd_benchmark_alternatives <- function(list_alternatives, seed = 20210906) {
+
+  bench::press(
+    n_sites   = bench_sites,
+    n_traits  = bench_traits,
+    n_species = bench_species,
+    {
+      set.seed(seed)
+      given_data = make_data(n_species, n_traits, n_sites)
+      bench::mark(
+        exprs = list_alternatives,
+        iterations = n_iterations,
+        check = FALSE,
+        memory = FALSE
+      )
+    }
+  )
+
+}
diff --git a/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R
@@ -0,0 +1,24 @@
+fd_benchmark_multicore <- function(fd_index) {
+
+  fd_fct <- rlang::as_function(fd_index)
+
+  bench::press(
+    n_sites   = bench_sites,
+    n_traits  = bench_traits,
+    n_species = bench_species,
+    {
+      set.seed(20210906)
+      given_data = make_data(n_species, n_traits, n_sites)
+      bench::mark(
+        iterations = n_iterations,
+        fundiversity_multicore =
+          {
+            fd_fct(given_data[["traits"]], given_data[["site_sp"]])
+          },
+        check = FALSE,
+        memory = FALSE
+      )
+    }
+  )
+
+}
diff --git a/inst/benchmark_scripts/benchmark_fcts/make_data.R b/inst/benchmark_scripts/benchmark_fcts/make_data.R
@@ -0,0 +1,19 @@
+make_data = function(n_species = 10, n_traits = 5, n_sites = 20) {
+  traits = matrix(runif(n_species * n_traits),
+                  ncol = n_traits, nrow = n_species,
+                  dimnames = list(species = paste0("sp", seq(n_species)),
+                                  traits  = paste0("t", seq(n_traits)))
+  )
+
+  site_sp = matrix(
+    as.numeric(runif(n_species * n_sites) >= 0.5),
+    nrow = n_sites,
+    ncol = n_species,
+    dimnames = list(
+      sites   = paste0("s", seq(n_sites)),
+      species = paste0("sp", seq(n_species))
+    )
+  )
+
+  list(traits = traits, site_sp = site_sp)
+}
diff --git a/inst/benchmark_scripts/benchmark_fd_multicore.R b/inst/benchmark_scripts/benchmark_fd_multicore.R
@@ -0,0 +1,57 @@
+# Script to make benchmark across different functions of similar packages
+
+# Options ----------------------------------------------------------------------
+options(fundiversity.memoise = FALSE)  # Deactivate memoisation
+
+# Package & Functions ----------------------------------------------------------
+pkgload::load_all()  # Need last version of fundiversity
+library("magrittr")
+source(
+  here::here("inst", "benchmark_scripts", "benchmark_fcts",
+             "fd_benchmark_multicore.R")
+)
+source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))
+
+# Benchmarks Parameters --------------------------------------------------------
+
+# Data Parameters
+bench_sites   = c(5e1 , 1e2, 5e2)
+bench_traits  = c(2,    4)
+bench_species = c(2e2,  5e2, 1e3)
+
+# Number of Repeats per function
+n_iterations = 20
+
+# Number of Cores over which to parallelize
+n_cores = c(1, seq(2, 6, by = 2))
+
+
+# Running the benchmark --------------------------------------------------------
+all_funs = alist(
+  fdis = fd_fdis, fdiv = fd_fdiv, feve = fd_feve, fric = fd_fric
+) %>%
+  purrr::cross2(n_cores) %>%
+  purrr::map(setNames, c("fundiv_function", "n_core")) %>%
+  purrr::map(
+    function(x) {
+
+      fundiv_function = x$fundiv_function
+      n_core          = x$n_core
+
+      future::plan(
+        list(
+          future::tweak(
+            future::multisession, workers = n_core
+          )
+        )
+      )
+
+      fd_benchmark_multicore(get(fundiv_function)) %>%
+        tidyr::unnest(c(time, gc)) %>%
+        select(-expression) %>%
+        mutate(fd_fct = as.character(fundiv_function), n_core = n_core)
+
+    }
+  )
+
+saveRDS(all_funs, "all_multicore_bench.Rds")
diff --git a/inst/benchmark_scripts/benchmark_fric_comparison.R b/inst/benchmark_scripts/benchmark_fric_comparison.R
@@ -0,0 +1,76 @@
+# Script to make benchmark across different functions of similar packages
+
+# Options ----------------------------------------------------------------------
+options(fundiversity.memoise = FALSE)  # Deactivate memoisation
+
+# Package & Functions ----------------------------------------------------------
+pkgload::load_all()  # Need last version of fundiversity
+source(
+  here::here("inst", "benchmark_scripts", "benchmark_fcts",
+             "fd_benchmark_alternatives.R")
+)
+source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))
+
+
+# FD indices expressions -------------------------------------------------------
+
+fric_exprs <- list(
+  "fric" = rlang::exprs(
+    fundiversity_fd_fric_unparallel =
+      {
+        fd_fric(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    BAT_alpha_tree = BAT::alpha(
+      given_data[["site_sp"]], given_data[["traits"]]
+    ),
+    BAT_alpha_hull = BAT::hull.alpha(
+      BAT::hull.build(given_data[["site_sp"]], given_data[["traits"]])
+    ),
+    mFD_alpha_fd   = mFD::alpha.fd.multidim(
+      given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fric",
+      scaling = FALSE, verbose = FALSE
+    )
+  ),
+  "fric_intersect" = rlang::exprs(
+    fundiversity_fd_fric_intersect_unparallel =
+      {
+        fd_fric_intersect(given_data[["traits"]], given_data[["site_sp"]])
+      },
+    betapart_functional_beta = betapart::functional.beta.pair(
+      given_data[["site_sp"]], given_data[["traits"]]
+    ),
+    hillR_funct_pairwise = hillR::hill_func_parti_pairwise(
+      given_data[["site_sp"]], given_data[["traits"]]
+    )
+  )
+)
+
+# Functional Richness Benchmark ------------------------------------------------
+
+bench_sites   = c(5e1 , 1e2)
+bench_traits  = c(2, 4)
+bench_species = c(2e2,  5e2)
+
+n_iterations = 30
+
+## Regular indices benchmark
+# Parallelize across indices
+future::plan(
+  future.batchtools::batchtools_slurm,
+  template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
+  label    = "funbench_fric",
+  resources = list(
+    job.name = "funbench_fric",
+    walltime = 7200,
+    memory = "150G",
+    ncpus = 1,
+    output   = "/work/%u/%x-%j.log",
+    email  = Sys.getenv("USEREMAIL")
+  )
+)
+
+fric_exprs["fric_intersect"] %>%
+  furrr::future_imap(
+    ~ fd_benchmark_alternatives(.x, seed=20210915) %>%
+      saveRDS(glue::glue("fric_bench_{index}_alternatives.rds", index = .y))
+  )