Skip to content

Commit

Permalink
Merge pull request #329 from HopkinsIDD/GH-191/longleaf-batch-submission
Browse files Browse the repository at this point in the history
Generic HPC Install Script
  • Loading branch information
TimothyWillard authored Oct 23, 2024
2 parents d34a364 + 86b5cd6 commit 24d243c
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 5 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/conda-env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Generate Conda Environment

on:
workflow_dispatch:
push:
paths:
- build/create_environment_yml.R
- flepimop/R_packages/*/DESCRIPTION
branches:
- main
pull_request:
paths:
- build/create_environment_yml.R
- flepimop/R_packages/*/DESCRIPTION
branches:
- main

jobs:
generate-environment-yml:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
- uses: r-lib/actions/setup-r@v2
- name: Generate Environment YAML
run: Rscript build/create_environment_yml.R
- name: Check For Environment Change
run: |
if [[ -n "$(git status -s -- environment.yml)" ]]; then
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
git add environment.yml
git commit -m 'Update `environment.yml` via GitHub action'
git push origin ${{ github.event.pull_request.head.ref }}
fi
134 changes: 134 additions & 0 deletions batch/hpc_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Generic setup
set -e

# Cluster specific setup
if [[ $1 == "longleaf" ]]; then
# Setup general purpose user variables needed for Longleaf
USERO=$( echo $USER | awk '{ print substr($0, 1, 1) }' )
USERN=$( echo $USER | awk '{ print substr($0, 2, 1) }' )
WORKDIR=$( realpath "/work/users/$USERO/$USERN/$USER/" )
USERDIR=$WORKDIR

# Load required modules
module purge
module load gcc/9.1.0
module load anaconda/2023.03
module load git
elif [[ $1 == "rockfish" ]]; then
# Setup general purspose user variables needed for RockFish
WORKDIR=$( realpath "/scratch4/struelo1/flepimop-code/$USER/" )
USERDIR=$WORKDIR
mkdir -vp $WORKDIR

# Load required modules
module purge
module load slurm
module load gcc/9.3.0
module load anaconda/2020.07
module load git/2.42.0
else
echo "The cluster name '$1' is not recognized, must be one of: 'longleaf', 'rockfish'."
set +e
exit 1
fi

# Ensure we have a $FLEPI_PATH
if [ -z "${FLEPI_PATH}" ]; then
echo -n "An explicit \$FLEPI_PATH was not provided, please set one (or press enter to use '$USERDIR/flepiMoP'): "
read FLEPI_PATH
if [ -z "${FLEPI_PATH}" ]; then
export FLEPI_PATH="$USERDIR/flepiMoP"
fi
export FLEPI_PATH=$( realpath "$FLEPI_PATH" )
echo "Using '$FLEPI_PATH' for \$FLEPI_PATH."
fi

# Conda init
if [ -z "${FLEPI_CONDA}" ]; then
echo -n "An explicit \$FLEPI_CONDA was not provided, please set one (or press enter to use 'flepimop-env'): "
read FLEPI_CONDA
if [ -z "${FLEPI_CONDA}" ]; then
export FLEPI_CONDA="flepimop-env"
fi
echo "Using '$FLEPI_CONDA' for \$FLEPI_CONDA."
fi
conda activate $FLEPI_CONDA

# Check the conda environment is valid
WHICH_PYTHON=$( which python )
WHICH_R=$( which R )
PYTHON_ARROW_VERSION=$( python -c "import pyarrow; print(pyarrow.__version__)" )
R_ARROW_VERSION=$( Rscript -e "cat(as.character(packageVersion('arrow')))" )
COMPATIBLE_ARROW_VERSION=$( echo "$R_ARROW_VERSION" | grep "$PYTHON_ARROW_VERSION" | wc -l )
if [[ "$COMPATIBLE_ARROW_VERSION" -ne 1 ]]; then
echo "The R version of arrow is '$R_ARROW_VERSION' and the python version is '$PYTHON_ARROW_VERSION'. These may not be compatible versions."
fi

# Make sure the credentials is is where we expect and have the right perms
if [ ! -f "$USERDIR/slack_credentials.sh" ]; then
echo "You should place sensitive credentials in '$USERDIR/slack_credentials.sh'."
else
chmod 600 $USERDIR/slack_credentials.sh
source $USERDIR/slack_credentials.sh
fi

# Set correct env vars
export FLEPI_STOCHASTIC_RUN=false
export FLEPI_RESET_CHIMERICS=TRUE
export TODAY=`date --rfc-3339='date'`

echo -n "Please set a project path (relative to '$WORKDIR'): "
read PROJECT_PATH
export PROJECT_PATH="$WORKDIR/$PROJECT_PATH"
if [ ! -d $PROJECT_PATH ]; then
echo "> The project path provided, $PROJECT_PATH, is not a directory. Please ensure this is correct."
fi

echo -n "Please set a config path (relative to '$PROJECT_PATH'): "
read CONFIG_PATH
export CONFIG_PATH="$PROJECT_PATH/$CONFIG_PATH"
if [ ! -f $CONFIG_PATH ]; then
echo "> The config path provided, $CONFIG_PATH, is not a file. Please ensure this is correct."
fi

echo -n "Please set a validation date (today is $TODAY): "
read VALIDATION_DATE

echo -n "Please set a resume location: "
read RESUME_LOCATION

echo -n "Please set a flepi run index: "
read FLEPI_RUN_INDEX

# Done
cat << EOM
> The HPC init script has successfully finished.
If you are testing if this worked, say installing for the first time, you can use the inference example from the \`flepimop_sample\` repository:
\`\`\`bash
cd \$PROJECT_PATH
flepimop-inference-main -c \$CONFIG_PATH -j 1 -n 1 -k 1
\`\`\`
Just make sure to \`rm -r model_output\` after running.
Otherwise make sure this diagnostic info looks correct before continuing:
* Cluster: $1
* User directory: $USERDIR
* Work directory: $WORKDIR
* Flepi conda: $FLEPI_CONDA
* Flepi path: $FLEPI_PATH
* Project path: $PROJECT_PATH
* Python: $WHICH_PYTHON
* R: $WHICH_R
* Python arrow: $PYTHON_ARROW_VERSION
* R arrow: $R_ARROW_VERSION
* Stochastic run: $FLEPI_STOCHASTIC_RUN
* Reset chimerics: $FLEPI_RESET_CHIMERICS
* Today: $TODAY
* Config path: $CONFIG_PATH
* Validation date: $VALIDATION_DATE
* Resume location: $RESUME_LOCATION
* Flepi run index: $FLEPI_RUN_INDEX
EOM

set +e
60 changes: 60 additions & 0 deletions build/create_environment_yml.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env Rscript

# Helper functions
split_pkgs <- \(x) unique(unlist(strsplit(gsub("\\s+", "", x), ",")))

# Light argument parsing
args <- commandArgs(trailingOnly = TRUE)
flepi_path <- if (length(args)) args[1L] else getwd()

# Get R package dependencies
rpkgs <- list.files(
file.path(flepi_path, "flepimop", "R_packages"),
full.names = TRUE
)
dependencies <- sapply(rpkgs, function(rpkg) {
description <- read.dcf(file.path(rpkg, "DESCRIPTION"))
sections <- c("Depends", "Imports")
contained_sections <- sections %in% colnames(description)
if (sum(contained_sections) >= 1L) {
return(split_pkgs(description[, sections[contained_sections]]))
}
character()
}, USE.NAMES = FALSE)
dependencies <- sort(unique(unlist(dependencies)))
dependencies <- setdiff(
dependencies,
c("arrow", "covidcast", "methods", basename(rpkgs))
)
dependencies <- dependencies[!grepl("^R(\\(.*\\))?$", dependencies)]

# Construct environment.yml file
environment_yml <- file.path(flepi_path, "environment.yml")
new_environment_yml <- c(
"channels:",
"- conda-forge",
"- defaults",
"- r",
"- dnachun",
"dependencies:",
"- python=3.11",
"- pip",
"- r-base>=4.3",
"- pyarrow=17.0.0",
"- r-arrow=17.0.0",
"- r-sf",
paste0("- r-", dependencies)
)
if (file.exists(environment_yml)) {
old_environment_yml <- readLines(environment_yml)
} else {
old_environment_yml <- character()
}
old_environment_yml <- old_environment_yml[!grepl("^#", old_environment_yml)]
if (!identical(new_environment_yml, old_environment_yml)) {
new_environment_yml <- c(
paste0("# ", format(Sys.time(), "%a %b %d %X %Y %Z")),
new_environment_yml
)
writeLines(new_environment_yml, environment_yml)
}
106 changes: 106 additions & 0 deletions build/hpc_install_or_update.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env bash

# Generic setup
set -e

# Cluster specific setup
if [[ $1 == "longleaf" ]]; then
# Setup general purpose user variables needed for Longleaf
USERO=$( echo $USER | awk '{ print substr($0, 1, 1) }' )
USERN=$( echo $USER | awk '{ print substr($0, 2, 1) }' )
WORKDIR=$( realpath "/work/users/$USERO/$USERN/$USER/" )
USERDIR=$WORKDIR

# Load required modules
module purge
module load gcc/9.1.0
module load anaconda/2023.03
module load git
elif [[ $1 == "rockfish" ]]; then
# Setup general purspose user variables needed for RockFish
WORKDIR=$( realpath "/scratch4/struelo1/flepimop-code/$USER/" )
USERDIR=$WORKDIR
mkdir -vp $WORKDIR

# Load required modules
module purge
module load gcc/9.3.0
module load anaconda/2020.07
module load git/2.42.0
else
echo "The cluster name '$1' is not recognized, must be one of: 'longleaf', 'rockfish'."
set +e
exit 1
fi

# Ensure we have a $FLEPI_PATH
if [ -z "${FLEPI_PATH}" ]; then
echo -n "An explicit \$FLEPI_PATH was not provided, please set one (or press enter to use '$USERDIR/flepiMoP'): "
read FLEPI_PATH
if [ -z "${FLEPI_PATH}" ]; then
export FLEPI_PATH="$USERDIR/flepiMoP"
fi
export FLEPI_PATH=$( realpath "$FLEPI_PATH" )
echo "Using '$FLEPI_PATH' for \$FLEPI_PATH."
fi

# Test that flepiMoP is located there
if [ ! -d "$FLEPI_PATH" ]; then
while true; do
read -p "Did not find flepiMoP at $FLEPI_PATH, do you want to clone the repo? (y/n) " resp
case "$resp" in
[yY])
echo "Cloning on your behalf."
git clone [email protected]:HopkinsIDD/flepiMoP.git $FLEPI_PATH
break
;;
[nN])
echo "Then you need to set a \$FLEPI_PATH before running, cannot proceed with install."
set +e
exit 1
;;
*)
echo "Invalid input. Please enter 'y' or 'n'. "
;;
esac
done
fi

# Setup the conda environment
if [ -z "${FLEPI_CONDA}" ]; then
echo -n "An explicit \$FLEPI_CONDA was not provided, please set one (or press enter to use 'flepimop-env'): "
read FLEPI_CONDA
if [ -z "${FLEPI_CONDA}" ]; then
export FLEPI_CONDA="flepimop-env"
fi
echo "Using '$FLEPI_CONDA' for \$FLEPI_CONDA."
fi
FLEPI_CONDA_ENV_MATCHES=$( conda info --envs | awk '{print $1}' | grep -x "$FLEPI_CONDA" | wc -l )
if [ "$FLEPI_CONDA_ENV_MATCHES" -eq 0 ]; then
conda env create --name $FLEPI_CONDA --file $FLEPI_PATH/environment.yml
fi

# Load the conda environment
conda activate $FLEPI_CONDA
[ -e "$CONDA_PREFIX/conda-meta/pinned" ] && rm $CONDA_PREFIX/conda-meta/pinned
cat << EOF > $CONDA_PREFIX/conda-meta/pinned
r-arrow==17.0.0
arrow==17.0.0
EOF

# Install the gempyor package from local
pip install --editable $FLEPI_PATH/flepimop/gempyor_pkg

# Install the local R packages
R -e "install.packages('covidcast', repos='https://cloud.r-project.org')"
RETURNTO=$( pwd )
cd $FLEPI_PATH/flepimop/R_packages/
for d in $( ls ); do
R CMD INSTALL $d
done
cd $RETURNTO
R -e "library(inference); inference::install_cli()"

# Done
echo "> Done installing/updating flepiMoP."
set +e
39 changes: 39 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Fri Oct 18 18:29:25 2024 UTC
channels:
- conda-forge
- defaults
- r
- dnachun
dependencies:
- python=3.11
- pip
- r-base>=4.3
- pyarrow=17.0.0
- r-arrow=17.0.0
- r-sf
- r-data.table
- r-doParallel
- r-dplyr
- r-foreach
- r-ggplot2
- r-ggraph
- r-httr
- r-jsonlite
- r-lubridate
- r-magrittr
- r-MMWRweek
- r-optparse
- r-purrr
- r-readr
- r-reticulate
- r-rlang
- r-stringr
- r-tibble
- r-tidygraph
- r-tidyr
- r-tidyselect
- r-tidyverse
- r-truncnorm
- r-vroom
- r-xts
- r-yaml
4 changes: 2 additions & 2 deletions flepimop/R_packages/flepiconfig/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: flepiconfig
Title: Config creation helper for flepiMoP
Version: 3.0.0
Imports:
tidyverse (>= 1.3.1),
readr (>= 2.0.0),
tidyverse,
readr,
lubridate,
magrittr,
yaml,
Expand Down
Loading

0 comments on commit 24d243c

Please sign in to comment.