Skip to content

Commit

Permalink
added 2 scripts to launch most pieces
Browse files Browse the repository at this point in the history
  • Loading branch information
robkooper committed Aug 22, 2024
1 parent ab637e5 commit 00b5313
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
8 changes: 8 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Utility scripts

This folder contains utility scripts that are used on the HPC to monitor the queues and start models on the HPC.

*upload_download.sh* - This script will download and start 2 singulatiry containers (if not already started) and tail the output logs.
*model_launcher.sh* - This script will check if any models are needed to be run and start them. This will launch about 1 pipeline for every 10 waiting jobs.

Both scripts require a files called secrets.sh to be in the same folder. If it does not exist, it will print a message if it does not exist.
82 changes: 82 additions & 0 deletions scripts/download_upload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash

if [ ! -e secrets.sh ]; then
cat <<EOF
Missing secrets.sh file. Please create one with the following variables:
# required variables
export CDR_TOKEN=this_is_a_secret_received_from_cdr
export RABBITMQ_URI=amqp://username:[email protected]:5672/%2F
export MONITOR_URL=https://server.url/monitor/queues.json
# using a specific version of the pipeline
export PIPELINE=pr36
EOF
exit 0
fi
source secrets.sh

# PIPELINE VERSION
if [ -z "${PIPELINE}" ]; then
PIPELINE=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-pipeline/releases/latest | jq -r .tag_name | sed 's/^v//')
fi

# CDR VERSION
if [ -z "${CDR}" ]; then
CDR=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-cdr/releases/latest | jq -r .tag_name | sed 's/^v//')
fi

# print versions
echo "PIPELINE : $PIPELINE"
echo "CDR : $CDR"

# download images if they don't exist
if [ ! -e criticalmaas-downloader_${CDR}.sif ]; then
apptainer pull --force criticalmaas-downloader_${CDR}.sif docker://ncsa/criticalmaas-downloader:${CDR}
rm -f criticalmaas-downloader_latest.sif
ln -s criticalmaas-downloader_${CDR}.sif criticalmaas-downloader_latest.sif
fi
if [ ! -e criticalmaas-uploader_${CDR}.sif ]; then
apptainer pull --force criticalmaas-uploader_${CDR}.sif docker://ncsa/criticalmaas-uploader:${CDR}
rm -f criticalmaas-uploader_latest.sif
ln -s criticalmaas-uploader_${CDR}.sif criticalmaas-uploader_latest.sif
fi
if [ ! -e criticalmaas-pipeline_${PIPELINE}.sif ]; then
apptainer pull --force criticalmaas-pipeline_${PIPELINE}.sif docker://ncsa/criticalmaas-pipeline:${PIPELINE}
rm -f criticalmaas-pipeline_latest.sif
ln -s criticalmaas-pipeline_${PIPELINE}.sif criticalmaas-pipeline_latest.sif
fi

# make folders
mkdir -p data output logs/downloader logs

# start images if not running
if [ -z "$(apptainer instance list | grep criticalmaas-downloader)" ]; then
apptainer instance run \
--pid-file criticalmaas-downloader.pid \
--no-home \
--contain \
--bind ./data:/data \
--env "RABBITMQ_URI=${RABBITMQ_URI}" \
criticalmaas-downloader_latest.sif \
criticalmaas-downloader \
python /src/CM_B_downloader.py
fi
if [ -z "$(apptainer instance list | grep criticalmaas-uploader)" ]; then
apptainer instance run \
--pid-file criticalmaas-uploader.pid \
--no-home \
--contain \
--bind ./output:/output \
--env "RABBITMQ_URI=${RABBITMQ_URI}" \
--env "CDR_TOKEN=${CDR_TOKEN}" \
criticalmaas-uploader_latest.sif \
criticalmaas-uploader \
python /src/uploader.py
fi

# showing log files
echo "----------------------------------------------------------------------"
echo "Showing log files, press Ctr-C to exit"
echo "tail -f ~/.apptainer/instances/logs/${HOSTNAME}/${USER}/criticalmaas-*"
echo "----------------------------------------------------------------------"
tail -f ~/.apptainer/instances/logs/${HOSTNAME}/${USER}/criticalmaas-*
36 changes: 36 additions & 0 deletions scripts/model_launcher.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

if [ ! -e secrets.sh ]; then
cat <<EOF
Missing secrets.sh file. Please create one with the following variables:
# required variables
export CDR_TOKEN=this_is_a_secret_received_from_cdr
export RABBITMQ_URI=amqp://username:[email protected]:5672/%2F
export MONITOR_URL=https://server.url/monitor/queues.json
# using a specific version of the pipeline
export PIPELINE=pr36
EOF
exit 0
fi
source secrets.sh

# start monitoring process queues
while [ 1 == 1 ]; do
LOG="$(date) :"
SKIP="\n"
for queue in golden_muscat icy_resin; do
RUNNING=$(squeue --name=${queue} --user ${USER} --noheader | wc -l)
JOBS=$(curl -s ${MONITOR_URL}?search=$queue | jq -r '.[0].total')
NEEDED=$(( (JOBS + 9) / 10 ))
NEEDED=$(( $NEEDED > 5 ? 5 : ${NEEDED} ))
if [ $RUNNING -lt $NEEDED ]; then
echo -en "${SKIP}Starting another pipeline for $queue. "
SKIP=""
sbatch --job-name ${queue} "/projects/bbym/shared/CDR_processing/pipeline_processing_003/${queue}_launcher.bash"
fi
LOG="${LOG} [$queue : Running=$RUNNING jobs=$JOBS need=$NEEDED] "
done
echo -ne "${LOG} \r"
sleep 1
done

0 comments on commit 00b5313

Please sign in to comment.