Skip to content

Commit

Permalink
Add benchmark scripts and template
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthias Grenié committed Dec 2, 2023
1 parent b98a6fe commit e27a46b
Show file tree
Hide file tree
Showing 7 changed files with 372 additions and 0 deletions.
51 changes: 51 additions & 0 deletions inst/benchmark_scripts/.batchtools.slurm.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

## Job Resource Interface Definition
##
## ntasks [integer(1)]: Number of required tasks,
## Set larger than 1 if you want to further parallelize
## with MPI within your job.
## ncpus [integer(1)]: Number of required cpus per task,
## Set larger than 1 if you want to further parallelize
## with multicore/parallel within each task.
## walltime [integer(1)]: Walltime for this job, in seconds.
## Must be at least 60 seconds for Slurm to work properly.
## memory [integer(1)]: Memory in megabytes for each cpu.
## Must be at least 100 (when I tried lower values my
## jobs did not start at all).
##
## Default resources can be set in your .batchtools.conf.R by defining the variable
## 'default.resources' as a named list.

<%
# relative paths are not handled well by Slurm
log.file = fs::path_expand(log.file)
-%>


#SBATCH --job-name=<%= resources$job.name %>
#SBATCH --output=<%= resources$output %>
#SBATCH --error=<%= log.file %>
#SBATCH --time=<%= resources$walltime %>
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=<%= resources$ncpus %>
#SBATCH --mem-per-cpu=<%= resources$memory %>
#SBATCH --mail-user=<%= resources$email %>
#SBATCH --mail-type=ALL
<%= if (!is.null(resources$partition)) sprintf(paste0("#SBATCH --partition='", resources$partition, "'")) %>
<%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %>

## Initialize work environment like
module load foss/2020b R/4.0.4-2
cd fundiversity

## Export value of DEBUGME environemnt var to slave
export DEBUGME=<%= Sys.getenv("DEBUGME") %>

<%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%>
<%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%>
<%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%>

## Run R:
## we merge R output with stdout from SLURM, which gets then logged via --output option
Rscript -e 'batchtools::doJobCollection("<%= uri %>")'
126 changes: 126 additions & 0 deletions inst/benchmark_scripts/benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_multicore.R")
)
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_alternatives.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))


# FD indices expressions -------------------------------------------------------

dbfd_indices <- rlang::exprs(
FD_dbFD_fdiv = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = TRUE,
calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
),
FD_dbFD_feve = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = FALSE,
calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
),
FD_dbFD_fric = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FRic = TRUE,
calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
),
FD_dbFD_raoq = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FRic = FALSE,
calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
)
)

fd_indices <- list(
"fdis" = rlang::exprs(
fundiversity_fd_fdis_unparallel =
{
fd_fdis(given_data[["traits"]], given_data[["site_sp"]])
},
BAT_dispersion = BAT::dispersion(
given_data[["site_sp"]], given_data[["traits"]]
),
FD_fdisp = FD::fdisp(
dist(given_data[["traits"]]), given_data[["site_sp"]]
),
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdis",
scaling = FALSE, verbose = FALSE
)
),
"fdiv" = rlang::exprs(
fundiversity_fd_fdiv_unparallel =
{
fd_fdiv(given_data[["traits"]], given_data[["site_sp"]])
},
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdiv",
scaling = FALSE, verbose = FALSE
)
),
"feve" = rlang::exprs(
fundiversity_fd_feve_unparallel =
{
fd_feve(given_data[["traits"]], given_data[["site_sp"]])
},
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "feve",
scaling = FALSE, verbose = FALSE
)
),
"raoq" = rlang::exprs(
fundiversity_fd_raoq_unparallel =
{
fd_raoq(given_data[["traits"]], given_data[["site_sp"]])
},
adiv_qe = adiv::QE(
given_data[["site_sp"]], dist(given_data[["traits"]])
),
BAT_rao = BAT::rao(
given_data[["site_sp"]], distance = given_data[["traits"]]
),
hillR_hill_func = hillR::hill_func(
given_data[["site_sp"]], given_data[["traits"]], fdis = FALSE
),
mFD_alpha_fd_hill = mFD::alpha.fd.hill(
given_data[["site_sp"]], dist(given_data[["traits"]]), q = 2,
tau = "max"
)
)
)

# Benchmarks -------------------------------------------------------------------
bench_sites = c(5e1 , 1e2, 5e2)
bench_traits = c(2, 4, 10)
bench_species = c(2e2, 5e2, 1e3)

n_iterations = 10

## Regular indices benchmark
# Parallelize across indices
future::plan(
future.batchtools::batchtools_slurm,
template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
label = "funbench",
resources = list(
job.name = "funbench",
walltime = 5760,
memory = "150G",
ncpus = 1,
output = "/work/%u/%j-%x.log",
email = Sys.getenv("USEREMAIL")
),
finalize = FALSE
)

fd_indices %>%
furrr::future_imap(
~ fd_benchmark_alternatives(.x) %>%
saveRDS(glue::glue("bench_{index}_alternatives.rds", index = .y))
)
19 changes: 19 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
fd_benchmark_alternatives <- function(list_alternatives, seed = 20210906) {

bench::press(
n_sites = bench_sites,
n_traits = bench_traits,
n_species = bench_species,
{
set.seed(seed)
given_data = make_data(n_species, n_traits, n_sites)
bench::mark(
exprs = list_alternatives,
iterations = n_iterations,
check = FALSE,
memory = FALSE
)
}
)

}
24 changes: 24 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
fd_benchmark_multicore <- function(fd_index) {

fd_fct <- rlang::as_function(fd_index)

bench::press(
n_sites = bench_sites,
n_traits = bench_traits,
n_species = bench_species,
{
set.seed(20210906)
given_data = make_data(n_species, n_traits, n_sites)
bench::mark(
iterations = n_iterations,
fundiversity_multicore =
{
fd_fct(given_data[["traits"]], given_data[["site_sp"]])
},
check = FALSE,
memory = FALSE
)
}
)

}
19 changes: 19 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/make_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
make_data = function(n_species = 10, n_traits = 5, n_sites = 20) {
traits = matrix(runif(n_species * n_traits),
ncol = n_traits, nrow = n_species,
dimnames = list(species = paste0("sp", seq(n_species)),
traits = paste0("t", seq(n_traits)))
)

site_sp = matrix(
as.numeric(runif(n_species * n_sites) >= 0.5),
nrow = n_sites,
ncol = n_species,
dimnames = list(
sites = paste0("s", seq(n_sites)),
species = paste0("sp", seq(n_species))
)
)

list(traits = traits, site_sp = site_sp)
}
57 changes: 57 additions & 0 deletions inst/benchmark_scripts/benchmark_fd_multicore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
library("magrittr")
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_multicore.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))

# Benchmarks Parameters --------------------------------------------------------

# Data Parameters
bench_sites = c(5e1 , 1e2, 5e2)
bench_traits = c(2, 4)
bench_species = c(2e2, 5e2, 1e3)

# Number of Repeats per function
n_iterations = 20

# Number of Cores over which to parallelize
n_cores = c(1, seq(2, 6, by = 2))


# Running the benchmark --------------------------------------------------------
all_funs = alist(
fdis = fd_fdis, fdiv = fd_fdiv, feve = fd_feve, fric = fd_fric
) %>%
purrr::cross2(n_cores) %>%
purrr::map(setNames, c("fundiv_function", "n_core")) %>%
purrr::map(
function(x) {

fundiv_function = x$fundiv_function
n_core = x$n_core

future::plan(
list(
future::tweak(
future::multisession, workers = n_core
)
)
)

fd_benchmark_multicore(get(fundiv_function)) %>%
tidyr::unnest(c(time, gc)) %>%
select(-expression) %>%
mutate(fd_fct = as.character(fundiv_function), n_core = n_core)

}
)

saveRDS(all_funs, "all_multicore_bench.Rds")
76 changes: 76 additions & 0 deletions inst/benchmark_scripts/benchmark_fric_comparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_alternatives.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))


# FD indices expressions -------------------------------------------------------

fric_exprs <- list(
"fric" = rlang::exprs(
fundiversity_fd_fric_unparallel =
{
fd_fric(given_data[["traits"]], given_data[["site_sp"]])
},
BAT_alpha_tree = BAT::alpha(
given_data[["site_sp"]], given_data[["traits"]]
),
BAT_alpha_hull = BAT::hull.alpha(
BAT::hull.build(given_data[["site_sp"]], given_data[["traits"]])
),
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fric",
scaling = FALSE, verbose = FALSE
)
),
"fric_intersect" = rlang::exprs(
fundiversity_fd_fric_intersect_unparallel =
{
fd_fric_intersect(given_data[["traits"]], given_data[["site_sp"]])
},
betapart_functional_beta = betapart::functional.beta.pair(
given_data[["site_sp"]], given_data[["traits"]]
),
hillR_funct_pairwise = hillR::hill_func_parti_pairwise(
given_data[["site_sp"]], given_data[["traits"]]
)
)
)

# Functional Richness Benchmark ------------------------------------------------

bench_sites = c(5e1 , 1e2)
bench_traits = c(2, 4)
bench_species = c(2e2, 5e2)

n_iterations = 30

## Regular indices benchmark
# Parallelize across indices
future::plan(
future.batchtools::batchtools_slurm,
template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
label = "funbench_fric",
resources = list(
job.name = "funbench_fric",
walltime = 7200,
memory = "150G",
ncpus = 1,
output = "/work/%u/%x-%j.log",
email = Sys.getenv("USEREMAIL")
)
)

fric_exprs["fric_intersect"] %>%
furrr::future_imap(
~ fd_benchmark_alternatives(.x, seed=20210915) %>%
saveRDS(glue::glue("fric_bench_{index}_alternatives.rds", index = .y))
)

0 comments on commit e27a46b

Please sign in to comment.