diff --git a/.github/workflows/conda-env.yml b/.github/workflows/conda-env.yml new file mode 100644 index 000000000..a2776047b --- /dev/null +++ b/.github/workflows/conda-env.yml @@ -0,0 +1,37 @@ +name: Generate Conda Environment + +on: + workflow_dispatch: + push: + paths: + - build/create_environment_yml.R + - flepimop/R_packages/*/DESCRIPTION + branches: + - main + pull_request: + paths: + - build/create_environment_yml.R + - flepimop/R_packages/*/DESCRIPTION + branches: + - main + +jobs: + generate-environment-yml: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + - uses: r-lib/actions/setup-r@v2 + - name: Generate Environment YAML + run: Rscript build/create_environment_yml.R + - name: Check For Environment Change + run: | + if [[ -n "$(git status -s -- environment.yml)" ]]; then + git config --global user.name "${{ github.actor }}" + git config --global user.email "${{ github.actor }}@users.noreply.github.com" + git add environment.yml + git commit -m 'Update `environment.yml` via GitHub action' + git push origin ${{ github.event.pull_request.head.ref }} + fi diff --git a/batch/hpc_init.sh b/batch/hpc_init.sh new file mode 100644 index 000000000..71091f580 --- /dev/null +++ b/batch/hpc_init.sh @@ -0,0 +1,134 @@ +# Generic setup +set -e + +# Cluster specific setup +if [[ $1 == "longleaf" ]]; then + # Setup general purpose user variables needed for Longleaf + USERO=$( echo $USER | awk '{ print substr($0, 1, 1) }' ) + USERN=$( echo $USER | awk '{ print substr($0, 2, 1) }' ) + WORKDIR=$( realpath "/work/users/$USERO/$USERN/$USER/" ) + USERDIR=$WORKDIR + + # Load required modules + module purge + module load gcc/9.1.0 + module load anaconda/2023.03 + module load git +elif [[ $1 == "rockfish" ]]; then + # Setup general purspose user variables needed for RockFish + WORKDIR=$( realpath "/scratch4/struelo1/flepimop-code/$USER/" ) + USERDIR=$WORKDIR + mkdir -vp $WORKDIR + + # Load required modules + module purge + module load slurm + module load gcc/9.3.0 + module load anaconda/2020.07 + module load git/2.42.0 +else + echo "The cluster name '$1' is not recognized, must be one of: 'longleaf', 'rockfish'." + set +e + exit 1 +fi + +# Ensure we have a $FLEPI_PATH +if [ -z "${FLEPI_PATH}" ]; then + echo -n "An explicit \$FLEPI_PATH was not provided, please set one (or press enter to use '$USERDIR/flepiMoP'): " + read FLEPI_PATH + if [ -z "${FLEPI_PATH}" ]; then + export FLEPI_PATH="$USERDIR/flepiMoP" + fi + export FLEPI_PATH=$( realpath "$FLEPI_PATH" ) + echo "Using '$FLEPI_PATH' for \$FLEPI_PATH." +fi + +# Conda init +if [ -z "${FLEPI_CONDA}" ]; then + echo -n "An explicit \$FLEPI_CONDA was not provided, please set one (or press enter to use 'flepimop-env'): " + read FLEPI_CONDA + if [ -z "${FLEPI_CONDA}" ]; then + export FLEPI_CONDA="flepimop-env" + fi + echo "Using '$FLEPI_CONDA' for \$FLEPI_CONDA." +fi +conda activate $FLEPI_CONDA + +# Check the conda environment is valid +WHICH_PYTHON=$( which python ) +WHICH_R=$( which R ) +PYTHON_ARROW_VERSION=$( python -c "import pyarrow; print(pyarrow.__version__)" ) +R_ARROW_VERSION=$( Rscript -e "cat(as.character(packageVersion('arrow')))" ) +COMPATIBLE_ARROW_VERSION=$( echo "$R_ARROW_VERSION" | grep "$PYTHON_ARROW_VERSION" | wc -l ) +if [[ "$COMPATIBLE_ARROW_VERSION" -ne 1 ]]; then + echo "The R version of arrow is '$R_ARROW_VERSION' and the python version is '$PYTHON_ARROW_VERSION'. These may not be compatible versions." +fi + +# Make sure the credentials is is where we expect and have the right perms +if [ ! -f "$USERDIR/slack_credentials.sh" ]; then + echo "You should place sensitive credentials in '$USERDIR/slack_credentials.sh'." +else + chmod 600 $USERDIR/slack_credentials.sh + source $USERDIR/slack_credentials.sh +fi + +# Set correct env vars +export FLEPI_STOCHASTIC_RUN=false +export FLEPI_RESET_CHIMERICS=TRUE +export TODAY=`date --rfc-3339='date'` + +echo -n "Please set a project path (relative to '$WORKDIR'): " +read PROJECT_PATH +export PROJECT_PATH="$WORKDIR/$PROJECT_PATH" +if [ ! -d $PROJECT_PATH ]; then + echo "> The project path provided, $PROJECT_PATH, is not a directory. Please ensure this is correct." +fi + +echo -n "Please set a config path (relative to '$PROJECT_PATH'): " +read CONFIG_PATH +export CONFIG_PATH="$PROJECT_PATH/$CONFIG_PATH" +if [ ! -f $CONFIG_PATH ]; then + echo "> The config path provided, $CONFIG_PATH, is not a file. Please ensure this is correct." +fi + +echo -n "Please set a validation date (today is $TODAY): " +read VALIDATION_DATE + +echo -n "Please set a resume location: " +read RESUME_LOCATION + +echo -n "Please set a flepi run index: " +read FLEPI_RUN_INDEX + +# Done +cat << EOM +> The HPC init script has successfully finished. + +If you are testing if this worked, say installing for the first time, you can use the inference example from the \`flepimop_sample\` repository: +\`\`\`bash +cd \$PROJECT_PATH +flepimop-inference-main -c \$CONFIG_PATH -j 1 -n 1 -k 1 +\`\`\` +Just make sure to \`rm -r model_output\` after running. + +Otherwise make sure this diagnostic info looks correct before continuing: +* Cluster: $1 +* User directory: $USERDIR +* Work directory: $WORKDIR +* Flepi conda: $FLEPI_CONDA +* Flepi path: $FLEPI_PATH +* Project path: $PROJECT_PATH +* Python: $WHICH_PYTHON +* R: $WHICH_R +* Python arrow: $PYTHON_ARROW_VERSION +* R arrow: $R_ARROW_VERSION +* Stochastic run: $FLEPI_STOCHASTIC_RUN +* Reset chimerics: $FLEPI_RESET_CHIMERICS +* Today: $TODAY +* Config path: $CONFIG_PATH +* Validation date: $VALIDATION_DATE +* Resume location: $RESUME_LOCATION +* Flepi run index: $FLEPI_RUN_INDEX +EOM + +set +e diff --git a/build/create_environment_yml.R b/build/create_environment_yml.R new file mode 100644 index 000000000..ea19324a4 --- /dev/null +++ b/build/create_environment_yml.R @@ -0,0 +1,60 @@ +#!/usr/bin/env Rscript + +# Helper functions +split_pkgs <- \(x) unique(unlist(strsplit(gsub("\\s+", "", x), ","))) + +# Light argument parsing +args <- commandArgs(trailingOnly = TRUE) +flepi_path <- if (length(args)) args[1L] else getwd() + +# Get R package dependencies +rpkgs <- list.files( + file.path(flepi_path, "flepimop", "R_packages"), + full.names = TRUE +) +dependencies <- sapply(rpkgs, function(rpkg) { + description <- read.dcf(file.path(rpkg, "DESCRIPTION")) + sections <- c("Depends", "Imports") + contained_sections <- sections %in% colnames(description) + if (sum(contained_sections) >= 1L) { + return(split_pkgs(description[, sections[contained_sections]])) + } + character() +}, USE.NAMES = FALSE) +dependencies <- sort(unique(unlist(dependencies))) +dependencies <- setdiff( + dependencies, + c("arrow", "covidcast", "methods", basename(rpkgs)) +) +dependencies <- dependencies[!grepl("^R(\\(.*\\))?$", dependencies)] + +# Construct environment.yml file +environment_yml <- file.path(flepi_path, "environment.yml") +new_environment_yml <- c( + "channels:", + "- conda-forge", + "- defaults", + "- r", + "- dnachun", + "dependencies:", + "- python=3.11", + "- pip", + "- r-base>=4.3", + "- pyarrow=17.0.0", + "- r-arrow=17.0.0", + "- r-sf", + paste0("- r-", dependencies) +) +if (file.exists(environment_yml)) { + old_environment_yml <- readLines(environment_yml) +} else { + old_environment_yml <- character() +} +old_environment_yml <- old_environment_yml[!grepl("^#", old_environment_yml)] +if (!identical(new_environment_yml, old_environment_yml)) { + new_environment_yml <- c( + paste0("# ", format(Sys.time(), "%a %b %d %X %Y %Z")), + new_environment_yml + ) + writeLines(new_environment_yml, environment_yml) +} diff --git a/build/hpc_install_or_update.sh b/build/hpc_install_or_update.sh new file mode 100755 index 000000000..745aaf2bf --- /dev/null +++ b/build/hpc_install_or_update.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +# Generic setup +set -e + +# Cluster specific setup +if [[ $1 == "longleaf" ]]; then + # Setup general purpose user variables needed for Longleaf + USERO=$( echo $USER | awk '{ print substr($0, 1, 1) }' ) + USERN=$( echo $USER | awk '{ print substr($0, 2, 1) }' ) + WORKDIR=$( realpath "/work/users/$USERO/$USERN/$USER/" ) + USERDIR=$WORKDIR + + # Load required modules + module purge + module load gcc/9.1.0 + module load anaconda/2023.03 + module load git +elif [[ $1 == "rockfish" ]]; then + # Setup general purspose user variables needed for RockFish + WORKDIR=$( realpath "/scratch4/struelo1/flepimop-code/$USER/" ) + USERDIR=$WORKDIR + mkdir -vp $WORKDIR + + # Load required modules + module purge + module load gcc/9.3.0 + module load anaconda/2020.07 + module load git/2.42.0 +else + echo "The cluster name '$1' is not recognized, must be one of: 'longleaf', 'rockfish'." + set +e + exit 1 +fi + +# Ensure we have a $FLEPI_PATH +if [ -z "${FLEPI_PATH}" ]; then + echo -n "An explicit \$FLEPI_PATH was not provided, please set one (or press enter to use '$USERDIR/flepiMoP'): " + read FLEPI_PATH + if [ -z "${FLEPI_PATH}" ]; then + export FLEPI_PATH="$USERDIR/flepiMoP" + fi + export FLEPI_PATH=$( realpath "$FLEPI_PATH" ) + echo "Using '$FLEPI_PATH' for \$FLEPI_PATH." +fi + +# Test that flepiMoP is located there +if [ ! -d "$FLEPI_PATH" ]; then + while true; do + read -p "Did not find flepiMoP at $FLEPI_PATH, do you want to clone the repo? (y/n) " resp + case "$resp" in + [yY]) + echo "Cloning on your behalf." + git clone git@github.com:HopkinsIDD/flepiMoP.git $FLEPI_PATH + break + ;; + [nN]) + echo "Then you need to set a \$FLEPI_PATH before running, cannot proceed with install." + set +e + exit 1 + ;; + *) + echo "Invalid input. Please enter 'y' or 'n'. " + ;; + esac + done +fi + +# Setup the conda environment +if [ -z "${FLEPI_CONDA}" ]; then + echo -n "An explicit \$FLEPI_CONDA was not provided, please set one (or press enter to use 'flepimop-env'): " + read FLEPI_CONDA + if [ -z "${FLEPI_CONDA}" ]; then + export FLEPI_CONDA="flepimop-env" + fi + echo "Using '$FLEPI_CONDA' for \$FLEPI_CONDA." +fi +FLEPI_CONDA_ENV_MATCHES=$( conda info --envs | awk '{print $1}' | grep -x "$FLEPI_CONDA" | wc -l ) +if [ "$FLEPI_CONDA_ENV_MATCHES" -eq 0 ]; then +conda env create --name $FLEPI_CONDA --file $FLEPI_PATH/environment.yml +fi + +# Load the conda environment +conda activate $FLEPI_CONDA +[ -e "$CONDA_PREFIX/conda-meta/pinned" ] && rm $CONDA_PREFIX/conda-meta/pinned +cat << EOF > $CONDA_PREFIX/conda-meta/pinned +r-arrow==17.0.0 +arrow==17.0.0 +EOF + +# Install the gempyor package from local +pip install --editable $FLEPI_PATH/flepimop/gempyor_pkg + +# Install the local R packages +R -e "install.packages('covidcast', repos='https://cloud.r-project.org')" +RETURNTO=$( pwd ) +cd $FLEPI_PATH/flepimop/R_packages/ +for d in $( ls ); do + R CMD INSTALL $d +done +cd $RETURNTO +R -e "library(inference); inference::install_cli()" + +# Done +echo "> Done installing/updating flepiMoP." +set +e diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..704bf88cc --- /dev/null +++ b/environment.yml @@ -0,0 +1,39 @@ +# Fri Oct 18 18:29:25 2024 UTC +channels: +- conda-forge +- defaults +- r +- dnachun +dependencies: +- python=3.11 +- pip +- r-base>=4.3 +- pyarrow=17.0.0 +- r-arrow=17.0.0 +- r-sf +- r-data.table +- r-doParallel +- r-dplyr +- r-foreach +- r-ggplot2 +- r-ggraph +- r-httr +- r-jsonlite +- r-lubridate +- r-magrittr +- r-MMWRweek +- r-optparse +- r-purrr +- r-readr +- r-reticulate +- r-rlang +- r-stringr +- r-tibble +- r-tidygraph +- r-tidyr +- r-tidyselect +- r-tidyverse +- r-truncnorm +- r-vroom +- r-xts +- r-yaml diff --git a/flepimop/R_packages/flepiconfig/DESCRIPTION b/flepimop/R_packages/flepiconfig/DESCRIPTION index e69be893d..0fa95b31c 100644 --- a/flepimop/R_packages/flepiconfig/DESCRIPTION +++ b/flepimop/R_packages/flepiconfig/DESCRIPTION @@ -2,8 +2,8 @@ Package: flepiconfig Title: Config creation helper for flepiMoP Version: 3.0.0 Imports: - tidyverse (>= 1.3.1), - readr (>= 2.0.0), + tidyverse, + readr, lubridate, magrittr, yaml, diff --git a/flepimop/R_packages/inference/DESCRIPTION b/flepimop/R_packages/inference/DESCRIPTION index cecd1ee72..6362c1f30 100644 --- a/flepimop/R_packages/inference/DESCRIPTION +++ b/flepimop/R_packages/inference/DESCRIPTION @@ -20,7 +20,8 @@ Imports: xts, reticulate, truncnorm, - arrow + arrow, + optparse RoxygenNote: 7.3.2 Suggests: testthat diff --git a/flepimop/R_packages/inference/R/install_cli.R b/flepimop/R_packages/inference/R/install_cli.R index 28193e1db..da7408b8e 100644 --- a/flepimop/R_packages/inference/R/install_cli.R +++ b/flepimop/R_packages/inference/R/install_cli.R @@ -10,12 +10,23 @@ #' #' @export install_cli <- function( - path = if (.Platform$OS.type == "unix") normalizePath(file.path("/usr", "local", "bin")) else stop("Unsupported OS") + path = { + condapth <- Sys.getenv("CONDA_PREFIX") + if (condapth != "") { + file.path(condapth, "bin") + } else { + stop("only support default path installation when conda is running.") + } + } ) { scriptfiles <- list.files( system.file("scripts", package = utils::packageName()), pattern = "flepimop-.*", full.names = TRUE ) from <- scriptfiles to <- file.path(path, gsub("\\.R$", "", basename(scriptfiles))) + to_remove <- file.exists(to) + if (any(to_remove)) { + file.remove(to[to_remove]) + } file.symlink(from, to) -} \ No newline at end of file +}