-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added 2 scripts to launch most pieces
- Loading branch information
Showing
3 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Utility scripts | ||
|
||
This folder contains utility scripts that are used on the HPC to monitor the queues and start models on the HPC. | ||
|
||
*upload_download.sh* - This script will download and start 2 singulatiry containers (if not already started) and tail the output logs. | ||
*model_launcher.sh* - This script will check if any models are needed to be run and start them. This will launch about 1 pipeline for every 10 waiting jobs. | ||
|
||
Both scripts require a files called secrets.sh to be in the same folder. If it does not exist, it will print a message if it does not exist. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#!/bin/bash | ||
|
||
if [ ! -e secrets.sh ]; then | ||
cat <<EOF | ||
Missing secrets.sh file. Please create one with the following variables: | ||
# required variables | ||
export CDR_TOKEN=this_is_a_secret_received_from_cdr | ||
export RABBITMQ_URI=amqp://username:[email protected]:5672/%2F | ||
export MONITOR_URL=https://server.url/monitor/queues.json | ||
# using a specific version of the pipeline | ||
export PIPELINE=pr36 | ||
EOF | ||
exit 0 | ||
fi | ||
source secrets.sh | ||
|
||
# PIPELINE VERSION | ||
if [ -z "${PIPELINE}" ]; then | ||
PIPELINE=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-pipeline/releases/latest | jq -r .tag_name | sed 's/^v//') | ||
fi | ||
|
||
# CDR VERSION | ||
if [ -z "${CDR}" ]; then | ||
CDR=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-cdr/releases/latest | jq -r .tag_name | sed 's/^v//') | ||
fi | ||
|
||
# print versions | ||
echo "PIPELINE : $PIPELINE" | ||
echo "CDR : $CDR" | ||
|
||
# download images if they don't exist | ||
if [ ! -e criticalmaas-downloader_${CDR}.sif ]; then | ||
apptainer pull --force criticalmaas-downloader_${CDR}.sif docker://ncsa/criticalmaas-downloader:${CDR} | ||
rm -f criticalmaas-downloader_latest.sif | ||
ln -s criticalmaas-downloader_${CDR}.sif criticalmaas-downloader_latest.sif | ||
fi | ||
if [ ! -e criticalmaas-uploader_${CDR}.sif ]; then | ||
apptainer pull --force criticalmaas-uploader_${CDR}.sif docker://ncsa/criticalmaas-uploader:${CDR} | ||
rm -f criticalmaas-uploader_latest.sif | ||
ln -s criticalmaas-uploader_${CDR}.sif criticalmaas-uploader_latest.sif | ||
fi | ||
if [ ! -e criticalmaas-pipeline_${PIPELINE}.sif ]; then | ||
apptainer pull --force criticalmaas-pipeline_${PIPELINE}.sif docker://ncsa/criticalmaas-pipeline:${PIPELINE} | ||
rm -f criticalmaas-pipeline_latest.sif | ||
ln -s criticalmaas-pipeline_${PIPELINE}.sif criticalmaas-pipeline_latest.sif | ||
fi | ||
|
||
# make folders | ||
mkdir -p data output logs/downloader logs | ||
|
||
# start images if not running | ||
if [ -z "$(apptainer instance list | grep criticalmaas-downloader)" ]; then | ||
apptainer instance run \ | ||
--pid-file criticalmaas-downloader.pid \ | ||
--no-home \ | ||
--contain \ | ||
--bind ./data:/data \ | ||
--env "RABBITMQ_URI=${RABBITMQ_URI}" \ | ||
criticalmaas-downloader_latest.sif \ | ||
criticalmaas-downloader \ | ||
python /src/CM_B_downloader.py | ||
fi | ||
if [ -z "$(apptainer instance list | grep criticalmaas-uploader)" ]; then | ||
apptainer instance run \ | ||
--pid-file criticalmaas-uploader.pid \ | ||
--no-home \ | ||
--contain \ | ||
--bind ./output:/output \ | ||
--env "RABBITMQ_URI=${RABBITMQ_URI}" \ | ||
--env "CDR_TOKEN=${CDR_TOKEN}" \ | ||
criticalmaas-uploader_latest.sif \ | ||
criticalmaas-uploader \ | ||
python /src/uploader.py | ||
fi | ||
|
||
# showing log files | ||
echo "----------------------------------------------------------------------" | ||
echo "Showing log files, press Ctr-C to exit" | ||
echo "tail -f ~/.apptainer/instances/logs/${HOSTNAME}/${USER}/criticalmaas-*" | ||
echo "----------------------------------------------------------------------" | ||
tail -f ~/.apptainer/instances/logs/${HOSTNAME}/${USER}/criticalmaas-* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
|
||
if [ ! -e secrets.sh ]; then | ||
cat <<EOF | ||
Missing secrets.sh file. Please create one with the following variables: | ||
# required variables | ||
export CDR_TOKEN=this_is_a_secret_received_from_cdr | ||
export RABBITMQ_URI=amqp://username:[email protected]:5672/%2F | ||
export MONITOR_URL=https://server.url/monitor/queues.json | ||
# using a specific version of the pipeline | ||
export PIPELINE=pr36 | ||
EOF | ||
exit 0 | ||
fi | ||
source secrets.sh | ||
|
||
# start monitoring process queues | ||
while [ 1 == 1 ]; do | ||
LOG="$(date) :" | ||
SKIP="\n" | ||
for queue in golden_muscat icy_resin; do | ||
RUNNING=$(squeue --name=${queue} --user ${USER} --noheader | wc -l) | ||
JOBS=$(curl -s ${MONITOR_URL}?search=$queue | jq -r '.[0].total') | ||
NEEDED=$(( (JOBS + 9) / 10 )) | ||
NEEDED=$(( $NEEDED > 5 ? 5 : ${NEEDED} )) | ||
if [ $RUNNING -lt $NEEDED ]; then | ||
echo -en "${SKIP}Starting another pipeline for $queue. " | ||
SKIP="" | ||
sbatch --job-name ${queue} "/projects/bbym/shared/CDR_processing/pipeline_processing_003/${queue}_launcher.bash" | ||
fi | ||
LOG="${LOG} [$queue : Running=$RUNNING jobs=$JOBS need=$NEEDED] " | ||
done | ||
echo -ne "${LOG} \r" | ||
sleep 1 | ||
done |