From 04d55fa49bc80420a8a96f1d5732767fa22cf45d Mon Sep 17 00:00:00 2001 From: Jannes Breier Date: Tue, 18 Jul 2023 10:27:49 +0200 Subject: [PATCH] new options for submitting lpjml to SLURM --- .buildlibrary | 2 +- CITATION.cff | 4 +-- DESCRIPTION | 4 +-- R/run_lpjml.R | 13 ++++---- R/submit_lpjml.R | 61 ++++++++++++++++++++++++++++++-------- R/write_config.R | 7 +++-- README.md | 6 ++-- man/submit_lpjml.Rd | 31 +++++++++++++------ man/write_config.Rd | 5 ++-- vignettes/lpjml-runner.Rmd | 2 +- 10 files changed, 93 insertions(+), 42 deletions(-) diff --git a/.buildlibrary b/.buildlibrary index 168c78a..ba0119b 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '2287233' +ValidationKey: '2307608' AutocreateReadme: yes AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' diff --git a/CITATION.cff b/CITATION.cff index 3ce8fec..51c0144 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'lpjmlkit: Toolkit for Basic LPJmL Handling' -version: 1.1.7 -date-released: '2023-07-11' +version: 1.1.8 +date-released: '2023-07-18' abstract: A collection of basic functions to facilitate the work with the Dynamic Global Vegetation Model (DGVM) Lund-Potsdam-Jena managed Land (LPJmL) hosted at the Potsdam Institute for Climate Impact Research (PIK). It provides functions for diff --git a/DESCRIPTION b/DESCRIPTION index abdd464..6eab2f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: lpjmlkit Type: Package Title: Toolkit for Basic LPJmL Handling -Version: 1.1.7 +Version: 1.1.8 Authors@R: c( person("Jannes", "Breier", , "jannesbr@pik-potsdam.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9055-6904")), person("Sebastian","Ostberg", , "ostberg@pik-potsdam.de", role = "aut", comment = c(ORCID = "0000-0002-2368-7015")), @@ -54,4 +54,4 @@ Suggests: sf Config/testthat/edition: 3 VignetteBuilder: knitr -Date: 2023-07-11 +Date: 2023-07-18 diff --git a/R/run_lpjml.R b/R/run_lpjml.R index 9a6843c..0cd8c4e 100644 --- a/R/run_lpjml.R +++ b/R/run_lpjml.R @@ -312,19 +312,18 @@ do_sequential <- function(sim_names, # Check if slurm is available if (is_slurm_available() && Sys.getenv("SLURM_JOB_ID") == "") { - Sys.setenv(I_MPI_DAPL_UD = "disable", # nolint:undesirable_function_linter. - I_MPI_FABRICS = "shm:shm", - I_MPI_DAPL_FABRIC = "shm:sh") + mpi_var <- Sys.getenv("I_MPI_DAPL_UD_PROVIDER") + Sys.unsetenv("I_MPI_DAPL_UD_PROVIDER")# nolint:undesirable_function_linter. + } else { + mpi_var <- NULL } for (sim_name in sim_names) { do_run(sim_name, model_path, sim_path, write_stdout, raise_error) } }, finally = { # Check if slurm is available - if (is_slurm_available() && Sys.getenv("SLURM_JOB_ID") == "") { - Sys.setenv(I_MPI_DAPL_UD = "enable", # nolint:undesirable_function_linter. - I_MPI_FABRICS = "shm:dapl") - Sys.unsetenv("I_MPI_DAPL_FABRIC") + if (!is.null(mpi_var)) { + Sys.setenv(I_MPI_DAPL_UD_PROVIDER = mpi_var) # nolint:undesirable_function_linter. } }) } diff --git a/R/submit_lpjml.R b/R/submit_lpjml.R index e6ba793..d61326f 100644 --- a/R/submit_lpjml.R +++ b/R/submit_lpjml.R @@ -27,17 +27,32 @@ #' More information at . Defaults to `"short"`. #' #' @param ntasks Integer defining the number of tasks/threads. More information -#' at and . -#' Defaults to `256`. +#' at and +#' . Defaults to `256`. #' #' @param wtime Character string defining the time limit. Setting a lower time #' limit than the maximum runtime for `sclass` can reduce the wait time in the #' SLURM job queue. More information at and -#' . +#' . #' #' @param blocking Integer defining the number of cores to be blocked. More #' information at and -#' . +#' . +#' +#' @param constraint Character string defining constraints for node selection. +#' Use `constraint = "haswell"` to request nodes of the type haswell with 16 +#' cores per node, `constraint = "broadwell"` to request nodes of the type +#' broadwell CPUs with 32 cores per node or `constraint = "exclusive"` to +#' reserve all CPUs of assigned nodes even if less are requested by `ntasks`. +#' Using `exclusive` should prevent interference of other batch jobs with +#' LPJmL. More information at and +#' . + +#' +#' @param slurm_options A named list of further arguments to be passed to sbatch. +#' E.g. list(`mail-user` = "max.mustermann@pik-potsdam.de") +#' More information at and +#' #' #' @param no_submit Logical. Set to `TRUE` to test if `x` set correctly or #' `FALSE` to actually submit job to SLURM. @@ -67,11 +82,11 @@ #' | scen2_transient | scen1 _spinup | #' #' To use different SLURM settings for each run the optional SLURM options -#' `"sclass"`, `"ntask"`, `"wtime"` or `"blocking"` can also be supplied to the -#' initial \link[tibble]{tibble} supplied as `param` to +#' `"sclass"`, `"ntasks"`, `"wtime"`, "blocking"` or `constraint` can also be +#' supplied to the initial \link[tibble]{tibble} supplied as `param` to #' [`write_config()`]. These overwrite the (default) SLURM -#' arguments (`sclass`, `ntask`, `wtime` or `blocking`) supplied to -#' `submit_lpjml`. +#' arguments (`sclass`, `ntasks`, `wtime`, `blocking` or ` `constraint`) +#' supplied to `submit_lpjml`. #' #' | **sim_name** | **dependency** | **wtime** | #' |:--------------- |:-------------- |----------:| @@ -175,6 +190,8 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. ntasks = 256, wtime = "", blocking = "", + constraint = "", + slurm_options = list(), no_submit = FALSE, output_path = NULL) { @@ -213,7 +230,9 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. x$type <- "simulation" x$job_id <- NA x$status <- "failed" - slurm_args <- c("sclass", "ntask", "wtime", "blocking") + slurm_args <- c( + "sclass", "ntasks", "wtime", "blocking", "constraint", "slurm_options" + ) if ("order" %in% colnames(x)) { @@ -258,7 +277,9 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. ntasks, wtime, blocking, - dependency) + constraint, + dependency, + slurm_options) if (job$status == 0) { x$job_id[sim_idx] <- strsplit( @@ -307,7 +328,9 @@ submit_lpjml <- function(x, # nolint:cyclocomp_linter. ntasks, wtime, blocking, - dependency = NA) + constraint, + dependency = NA, + slurm_options) if (job$status == 0) { x$job_id[sim_idx] <- strsplit( @@ -341,7 +364,9 @@ submit_run <- function(sim_name, ntasks, wtime, blocking, - dependency) { + constraint, + dependency, + slurm_options) { config_file <- paste0("config_", sim_name, @@ -373,6 +398,14 @@ submit_run <- function(sim_name, timestamp, ".json") + if (is.list(slurm_options) && length(slurm_options) > 0) { + further_slurm_options <- paste0( + " -option ", names(slurm_options), "=", slurm_options, collapse = " " + ) + } else { + further_slurm_options <- "" + } + inner_command <- paste0(model_path, "/bin/lpjsubmit", # nolint:absolute_path_linter. " -nocheck", " -class ", sclass, @@ -383,9 +416,13 @@ submit_run <- function(sim_name, ifelse(blocking != "", paste0(" -blocking ", blocking), ""), + ifelse(constraint != "", + paste0(" -constraint ", constraint), + ""), ifelse(!is.na(dependency), paste0(" -dependency ", dependency), ""), + further_slurm_options, " -o ", stdout, " -e ", stderr, " ", diff --git a/R/write_config.R b/R/write_config.R index 6acbc20..e4a83cb 100644 --- a/R/write_config.R +++ b/R/write_config.R @@ -120,7 +120,8 @@ #' (`wtime`) for the transient run than the spin-up run to get a higher priority #' in the SLURM queue. This can be achieved by supplying this option as a #' parameter to `param`. \cr -#' 4 options are available, namely `sclass`, `ntask`, `wtime`, `blocking`. \cr +#' 6 options are available, namely `sclass`, `ntasks`, `wtime`, `blocking`, +#' `constraint` and `slurm_options`. Use as arguments for [submit_lpjml()`].\cr #' If specified in `param`, they overwrite the corresponding function arguments #' in [`submit_lpjml()`]. #' @@ -313,7 +314,9 @@ write_config <- function(x, order = NA, dependency = NA) - slurm_args <- c("sclass", "ntask", "wtime", "blocking") + slurm_args <- c( + "sclass", "ntasks", "wtime", "blocking", "constraint", "slurm_options" + ) config_tmp[slurm_args] <- NA diff --git a/README.md b/README.md index b747b90..9f3f396 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Toolkit for Basic LPJmL Handling -R package **lpjmlkit**, version **1.1.7** +R package **lpjmlkit**, version **1.1.8** [![CRAN status](https://www.r-pkg.org/badges/version/lpjmlkit)](https://cran.r-project.org/package=lpjmlkit) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7773134.svg)](https://doi.org/10.5281/zenodo.7773134) [![R build status](https://github.com/PIK-LPJmL/lpjmlkit/workflows/check/badge.svg)](https://github.com/PIK-LPJmL/lpjmlkit/actions) [![codecov](https://codecov.io/gh/PIK-LPJmL/lpjmlkit/branch/master/graph/badge.svg)](https://app.codecov.io/gh/PIK-LPJmL/lpjmlkit) [![r-universe](https://pik-piam.r-universe.dev/badges/lpjmlkit)](https://pik-piam.r-universe.dev/builds) @@ -76,7 +76,7 @@ In case of questions / problems please contact Jannes Breier . +Breier J, Ostberg S, Wirth S, Minoli S, Stenzel F, Müller C (2023). _lpjmlkit: Toolkit for Basic LPJmL Handling_. doi: 10.5281/zenodo.7773134 (URL: https://doi.org/10.5281/zenodo.7773134), R package version 1.1.8, . A BibTeX entry for LaTeX users is @@ -85,7 +85,7 @@ A BibTeX entry for LaTeX users is title = {lpjmlkit: Toolkit for Basic LPJmL Handling}, author = {Jannes Breier and Sebastian Ostberg and Stephen Björn Wirth and Sara Minoli and Fabian Stenzel and Christoph Müller}, year = {2023}, - note = {R package version 1.1.7}, + note = {R package version 1.1.8}, doi = {10.5281/zenodo.7773134}, url = {https://github.com/PIK-LPJmL/lpjmlkit}, } diff --git a/man/submit_lpjml.Rd b/man/submit_lpjml.Rd index 05c6ab2..9ceb758 100644 --- a/man/submit_lpjml.Rd +++ b/man/submit_lpjml.Rd @@ -13,6 +13,8 @@ submit_lpjml( ntasks = 256, wtime = "", blocking = "", + constraint = "", + slurm_options = list(), no_submit = FALSE, output_path = NULL ) @@ -40,17 +42,31 @@ options at PIK: \code{c("short", "medium", "long", "priority", "standby", "io")} More information at \url{https://www.pik-potsdam.de/en}. Defaults to \code{"short"}.} \item{ntasks}{Integer defining the number of tasks/threads. More information -at \url{https://www.pik-potsdam.de/en} and \url{https://slurm.schedmd.com}. -Defaults to \code{256}.} +at \url{https://www.pik-potsdam.de/en} and +\url{https://slurm.schedmd.com/sbatch.html}. Defaults to \code{256}.} \item{wtime}{Character string defining the time limit. Setting a lower time limit than the maximum runtime for \code{sclass} can reduce the wait time in the SLURM job queue. More information at \url{https://www.pik-potsdam.de/en} and -\url{https://slurm.schedmd.com}.} +\url{https://slurm.schedmd.com/sbatch.html}.} \item{blocking}{Integer defining the number of cores to be blocked. More information at \url{https://www.pik-potsdam.de/en} and -\url{https://slurm.schedmd.com}.} +\url{https://slurm.schedmd.com/sbatch.html}.} + +\item{constraint}{Character string defining constraints for node selection. +Use \code{constraint = "haswell"} to request nodes of the type haswell with 16 +cores per node, \code{constraint = "broadwell"} to request nodes of the type +broadwell CPUs with 32 cores per node or \code{constraint = "exclusive"} to +reserve all CPUs of assigned nodes even if less are requested by \code{ntasks}. +Using \code{exclusive} should prevent interference of other batch jobs with +LPJmL. More information at \url{https://www.pik-potsdam.de} and +\url{https://slurm.schedmd.com/sbatch.html}.} + +\item{slurm_options}{A named list of further arguments to be passed to sbatch. +E.g. list(\code{mail-user} = "max.mustermann@pik-potsdam.de") +More information at \url{https://www.pik-potsdam.de} and +\url{https://slurm.schedmd.com/sbatch.html}} \item{no_submit}{Logical. Set to \code{TRUE} to test if \code{x} set correctly or \code{FALSE} to actually submit job to SLURM.} @@ -87,11 +103,8 @@ parameter \code{"dependency"} needs to be provided within the initial To use different SLURM settings for each run the optional SLURM options -\code{"sclass"}, \code{"ntask"}, \code{"wtime"} or \code{"blocking"} can also be supplied to the -initial \link[tibble]{tibble} supplied as \code{param} to -\code{\link[=write_config]{write_config()}}. These overwrite the (default) SLURM -arguments (\code{sclass}, \code{ntask}, \code{wtime} or \code{blocking}) supplied to -\code{submit_lpjml}.\tabular{llr}{ +\code{"sclass"}, \code{"ntasks"}, \code{"wtime"}, "blocking"\code{or}constraint\verb{can also be supplied to the initial \link[tibble]\{tibble\} supplied as}param\verb{ to [}write_config()\verb{]. These overwrite the (default) SLURM arguments (}sclass\verb{, }ntasks\verb{, }wtime\verb{, }blocking\code{or} \code{constraint}) +supplied to \code{submit_lpjml}.\tabular{llr}{ \strong{sim_name} \tab \strong{dependency} \tab \strong{wtime} \cr scen1_spinup \tab NA \tab "8:00:00" \cr scen2_transient \tab scen1 _spinup \tab "2:00:00" \cr diff --git a/man/write_config.Rd b/man/write_config.Rd index 4d9b559..f62d491 100644 --- a/man/write_config.Rd +++ b/man/write_config.Rd @@ -138,9 +138,8 @@ separately. For example, users may want to set a lower wall clock limit (\code{wtime}) for the transient run than the spin-up run to get a higher priority in the SLURM queue. This can be achieved by supplying this option as a parameter to \code{param}. \cr -4 options are available, namely \code{sclass}, \code{ntask}, \code{wtime}, \code{blocking}. \cr -If specified in \code{param}, they overwrite the corresponding function arguments -in \code{\link[=submit_lpjml]{submit_lpjml()}}. +6 options are available, namely \code{sclass}, \code{ntasks}, \code{wtime}, \code{blocking}, +\code{constraint} and \code{slurm_options}. Use as arguments for [submit_lpjml()\verb{].\\cr If specified in }param\verb{, they overwrite the corresponding function arguments in [}submit_lpjml()`]. \if{html}{\out{
}}\preformatted{my_params4 <- tibble( sim_name = c("scen1_spinup", "scen1_transient"), diff --git a/vignettes/lpjml-runner.Rmd b/vignettes/lpjml-runner.Rmd index 8c64343..9625d4a 100644 --- a/vignettes/lpjml-runner.Rmd +++ b/vignettes/lpjml-runner.Rmd @@ -262,7 +262,7 @@ params <- tibble( dependency = c(NA, "spinup", "spinup"), # slurm option wtime: analogous to sbatch -wtime defines slurm option # individually per config, overwrites submit_lpjml argument - # (same for sclass, ntasks, blocking) + # (same for sclass, ntasks, blocking or constraint) wtime = c("15:00:00", "3:00:00", "3:00:00") )