diff --git a/.gitignore b/.gitignore index 35529a9..fe0a118 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ venv __pycache__ docker-compose.override.yml +secrets.sh *.pyc *.DS_Store diff --git a/README.md b/README.md index b0f7246..13821dc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,33 @@ +# UIUC-CDR + +This repository contains the hook to receive messages from the CDR and starts the processing. The full stack consists of containers to proxy incoming http requests to the correct containers (traefik), container to handle the hooks (cdrhook), container to handle all messages (rabbitmq), show the status (monitor), download data (downloader) and upload the data (uploader) as well as a single model that is executed on the data (icy-resin). + +# QuickStart + +To be able to run you will need some files, to make this easier we have created a quickstart shell script that you can run. This will download all files needed as well as create some default value files. + +``` +curl -o quickstart.sh -s -L https://raw.githubusercontent.com/DARPA-CRITICALMAAS/uiuc-cdr/refs/heads/main/quickstart.sh +chmod 755 quickstart.sh +./quickstart.sh +``` + +The first time you run this, it will create four files: +- `secrets.sh` file that you can edit to change any variables needed +- `docker-compose.override.yml` these are changes to the docker-compose file +- `docker-compose.yml` *DO NOT EDIT* this file will be downloaded each time to make sure you have the latest version +- `.env` *DO NOT EDIT* this will be created from the secrets.sh file + +Edit `secrets.sh` and `docker-compose.override.yml` to fit your environment.At a minumum you will need to change the `CDR_TOKEN`, but it is highly recommended to change `RABBITMQ_USERNAME`, `RABBITMQ_PASSWORD` and `CDRHOOK_SECRET`. If you only want to run the cdrhook, change the PROFILE to be `cdrhook`. + +Once you have the secrets.sh file setup, you can use `quickstart.sh` to start the full stack.To restart it, simpl run `quickstart.sh` again. + +To only start the pipeline, you can all four files to the GPU machine, change the `PROFILE` in `secrets.sh` to be pipeline and run `quickstart.sh`. + +To stop the stack you can use `docker compose --profile allinone down`, you can use the profile allinone even if you only start the pipeline or cdrhook. + +If you use the cdrhook profile, it will not start traefik by default. You can manually start that in this case with `docker compose --profile traefik up -d` + # CDR Hook for NCSA pipeline This repository contains the hook to receive messages from the CDR and starts the processing. The full stack consists of a few containers that work together: @@ -6,7 +36,7 @@ This repository contains the hook to receive messages from the CDR and starts th - **rabbitmq**: The orchestrator of all the work, all other containers connect to this and will receive work. If any of the messages can not be handled it will be added to the `.error` with the exception attached to the original message. - **cdrhook**: this is the entry point for all work, it will register with the CDR and receive messages when new work needs to be done. When a message arrives it will check to see if all necessary metadata is available and if so, it will send a message to the `download` queue. - **downloader**: this will download the image and the metadata to a shared folder that can be used by the actual processing container. This can run on a different server than the cdrhook, but does need to have access to the same storage system that the pipeline uses. Once it is downloaded it will send a message to each of the pipelines that run a model using the `process_` queue name. -- **pipeline**: this will do the actual inference of the map, it will use the map and the metadata and find all the legends and appropriate regions in the map and write the result to the output folder ready for the CDR, and send a message to the `upload` queue. +- **icy-resin**: this will do the actual inference of the map, it will use the map and the metadata and find all the legends and appropriate regions in the map and write the result to the output folder ready for the CDR, and send a message to the `upload` queue. - **uploader**: this will upload the processed data from the pipeline to the CDR and move the message to `completed` queue. - **monitor**: this not really part of the system, but will show the number of messages in the different queues, making it easy to track overall progress. diff --git a/cdrhook/models.json b/cdrhook/models.json index efe51bf..396f8c1 100644 --- a/cdrhook/models.json +++ b/cdrhook/models.json @@ -1,3 +1,3 @@ { - "golden_muscat": ["map_area", "polygon_legend_area"] + "icy_resin": ["map_area", "polygon_legend_area"] } diff --git a/cdrhook/server.py b/cdrhook/server.py index 2bccb95..3426cf8 100644 --- a/cdrhook/server.py +++ b/cdrhook/server.py @@ -26,7 +26,7 @@ auth = HTTPBasicAuth() -cdr_url = "https://api.cdr.land" +cdr_url = os.getenv("CDR_URL","https://api.cdr.land") config = { } cdr_connector = None @@ -422,6 +422,7 @@ def create_app(): cdr_connector = CdrConnector( system_name=os.getenv("SYSTEM_NAME"), system_version=os.getenv("SYSTEM_VERSION"), + cdr_url=os.getenv("CDR_URL", "https://api.cdr.land"), token=os.getenv("CDR_TOKEN"), callback_url=os.getenv("CALLBACK_URL")+'/hook', callback_secret=os.getenv("CALLBACK_SECRET"), diff --git a/docker-compose.example.yml b/docker-compose.example.yml index 43303d2..f4cbfe5 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -1,4 +1,32 @@ services: + # Add SSL to traefik + traefik: + command: + - --log.level=INFO + - --api=true + - --api.dashboard=true + - --api.insecure=true + # Entrypoints + - --entrypoints.http.address=:80 + - --entrypoints.http.http.redirections.entryPoint.to=https + - --entrypoints.https.address=:443 + - --entrypoints.https.http.tls.certresolver=myresolver + # letsencrypt + - --certificatesresolvers.myresolver.acme.email=${TRAEFIK_ACME_EMAIL} + - --certificatesresolvers.myresolver.acme.storage=/config/acme.json + # uncomment to use testing certs + #- --certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory + - --certificatesresolvers.myresolver.acme.httpchallenge=true + - --certificatesresolvers.myresolver.acme.httpchallenge.entrypoint=http + # Docker setup + - --providers.docker=true + - --providers.docker.endpoint=unix:///var/run/docker.sock + - --providers.docker.exposedbydefault=false + - --providers.docker.watch=true + ports: + - "80:80" + - "443:443" + cdrhook: environment: SYSTEM_NAME: ${SYSTEM_NAME} diff --git a/docker-compose.yml b/docker-compose.yml index c0445dd..be00d24 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: # REVERSE PROXY # ---------------------------------------------------------------------- traefik: - image: "traefik:v2.11" + image: "traefik:munster" command: - --log.level=INFO - --api=true @@ -13,26 +13,19 @@ services: # Entrypoints - --entrypoints.http.address=:80 - --entrypoints.http.http.redirections.entryPoint.to=https - - --entrypoints.https.address=:443 - - --entrypoints.https.http.tls.certresolver=myresolver - # letsencrypt - - --certificatesresolvers.myresolver.acme.email=${TRAEFIK_ACME_EMAIL} - - --certificatesresolvers.myresolver.acme.storage=/config/acme.json - # uncomment to use testing certs - #- --certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory - - --certificatesresolvers.myresolver.acme.httpchallenge=true - - --certificatesresolvers.myresolver.acme.httpchallenge.entrypoint=http # Docker setup - --providers.docker=true - --providers.docker.endpoint=unix:///var/run/docker.sock - --providers.docker.exposedbydefault=false - --providers.docker.watch=true restart: "unless-stopped" + profiles: + - traefik + - allinone security_opt: - no-new-privileges:true ports: - "80:80" - - "443:443" volumes: - "traefik:/config" - "/var/run/docker.sock:/var/run/docker.sock:ro" @@ -44,6 +37,9 @@ services: image: rabbitmq:3.13-management hostname: rabbitmq restart: unless-stopped + profiles: + - cdrhook + - allinone environment: RABBITMQ_DEFAULT_USER: "${RABBITMQ_USERNAME:-guest}" RABBITMQ_DEFAULT_PASS: "${RABBITMQ_PASSWORD:-guest}" @@ -55,16 +51,19 @@ services: # CDR HOOK # ---------------------------------------------------------------------- cdrhook: - image: ncsa/criticalmaas-cdr:latest + image: ncsa/criticalmaas-cdr:${CDRHOOK_VERSION:-latest} hostname: cdrhook - build: cdrhook restart: unless-stopped + profiles: + - cdrhook + - allinone depends_on: - rabbitmq environment: + CDR_URL: "${CDR_URL}" CDR_TOKEN: "${CDR_TOKEN}" CDR_KEEP_EVENT: "no" - CALLBACK_URL: "https://${SERVER_NAME}/cdr" + CALLBACK_URL: "${CALLBACK_URL}" CALLBACK_SECRET: "${CALLBACK_SECRET}" CALLBACK_USERNAME: "${CALLBACK_USERNAME}" CALLBACK_PASSWORD: "${CALLBACK_PASSWORD}" @@ -80,10 +79,13 @@ services: # RABBITMQ MONITOR # ---------------------------------------------------------------------- monitor: - image: ncsa/criticalmaas-monitor:latest + image: ncsa/criticalmaas-monitor:${CDRHOOK_VERSION:-latest} hostname: monitor build: monitor restart: unless-stopped + profiles: + - cdrhook + - allinone depends_on: - rabbitmq environment: @@ -98,17 +100,17 @@ services: # DATA PROCESSING PIPELINE # use one, or more, per model to be executed # ---------------------------------------------------------------------- - golden_muscat: - image: ncsa/criticalmaas-pipeline:latest - build: ../uiuc-pipeline + icy-resin: + image: ncsa/criticalmaas-pipeline:${PIPELINE_VERSION:-latest} runtime: nvidia + restart: "unless-stopped" profiles: - pipeline - depends_on: - - rabbitmq + - allinone environment: NVIDIA_VISIBLE_DEVICES: all PREFIX: "" + ipc: host command: - -v - --data @@ -123,9 +125,10 @@ services: - "amqp://${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD}@rabbitmq/%2F" - --inactive_timeout - "86000" + - --output_types + - cdr_json - --model - - golden_muscat - restart: "unless-stopped" + - icy_resin volumes: - "data:/data" - "logs:/logs" @@ -136,27 +139,24 @@ services: # DOWNLOADER and UPLOADER # ---------------------------------------------------------------------- downloader: - image: ncsa/criticalmaas-downloader:latest - build: uploader + image: ncsa/criticalmaas-downloader:${CDRHOOK_VERSION:-latest} restart: "unless-stopped" profiles: - pipeline - depends_on: - - rabbitmq + - allinone environment: RABBITMQ_URI: "amqp://${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD}@rabbitmq/%2F" volumes: - "data:/data" uploader: - image: ncsa/criticalmaas-uploader:latest - build: uploader + image: ncsa/criticalmaas-uploader:${CDRHOOK_VERSION:-latest} restart: "unless-stopped" profiles: - pipeline - depends_on: - - rabbitmq + - allinone environment: + CDR_URL: "${CDR_URL}" CDR_TOKEN: "${CDR_TOKEN}" RABBITMQ_URI: "amqp://${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD}@rabbitmq/%2F" PREFIX: "" diff --git a/quickstart.sh b/quickstart.sh new file mode 100755 index 0000000..fb842fd --- /dev/null +++ b/quickstart.sh @@ -0,0 +1,273 @@ +#!/bin/bash + +# check requirements +if [ ! -e /usr/bin/docker ]; then + echo "Docker is not installed, please install docker" + exit 1 +fi +if [ ! -e /usr/bin/jq ]; then + echo "jq is not installed, please install jq" + exit 1 +fi + +# fetch latest versions +CDR_TAG=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-cdr/releases/latest | jq -r '.tag_name') +PIPELINE_TAG=$(curl -s https://api.github.com/repos/DARPA-CRITICALMAAS/uiuc-pipeline/releases/latest | jq -r '.tag_name') + +# ------------------------------------------------------- +# load secrets.sh file, if not exist create template +# ------------------------------------------------------- +if [ ! -e "./secrets.sh" ]; then + cat << "EOF" > secrets.sh +# ------------------------------------------------------- +# what profile to use by default +# ------------------------------------------------------- +export PROFILE=allinone +# ------------------------------------------------------- +# common variables +# ------------------------------------------------------- +export SERVER_NAME="$(hostname -f)" +export CDR_TOKEN=this_is_a_secret_received_from_cdr +export CDR_URL=https://api.cdr.land +export RABBITMQ_USERNAME=guest +export RABBITMQ_PASSWORD=guest +# only change this if you have an external rabbitmq server +export RABBITMQ_MGMT_URL=http://rabbitmq:15672 +# ------------------------------------------------------- +# for cdrhook docker-compose +# ------------------------------------------------------- +# letsencrypt email address +export EMAIL_ADDRESS="cert@${SERVER_NAME}" +export CDRHOOK_URL=https://${SERVER_NAME}/cdr +export CDRHOOK_SECRET=you-should-change-this +export CDRHOOK_VERSION=latest +# ------------------------------------------------------- +# for pipeline docker-compose/launcher +# ------------------------------------------------------- +export RABBITMQ_URI=amqp://${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD}@server.url:5672/%2F +export MONITOR_URL=https://{SERVER_NAME}/monitor/queues.json +# using a specific version of the pipeline +export PIPELINE_VERSION=latest +EOF + echo "Please update the secrets.sh file with the correct values" + exit 1 +fi +source ./secrets.sh + +# print message about version being used +if [ "${CDRHOOK_VERSION}" != "" -a "${CDRHOOK_VERSION}" != "latest" ]; then + echo "Using NCSA cdrhook version : ${CDRHOOK_VERSION}" + if [ "${CDRHOOK_VERSION}" != "${CDR_TAG}" ]; then + echo "Latest NCSA cdrhook version is : ${CDR_TAG}" + fi +else + echo "Using NCSA cdrhook version : ${CDR_TAG} (latest)" + export CDRHOOK_VERSION=${CDR_TAG} +fi +if [ "${PIPELINE_VERSION}" != "" -a "${PIPELINE_VERSION}" != "latest" ]; then + echo "Using NCSA pipeline version : ${PIPELINE_VERSION}" + if [ "${PIPELINE_VERSION}" != "${PIPELINE_TAG}" ]; then + echo "Latest NCSA pipeline version is : ${PIPELINE_TAG}" + fi +else + echo "Using NCSA pipeline version : ${PIPELINE_TAG} (latest)" + export PIPELINE_VERSION=${PIPELINE_TAG} +fi + +echo "${CALLBACK_URL}" +# remove the v from the docker version +export CDRHOOK_VERSION=$(echo ${CDRHOOK_VERSION} | sed 's/v//') +export PIPELINE_VERSION=$(echo ${PIPELINE_VERSION} | sed 's/v//') + +# ------------------------------------------------------- +# download latest released docker-compose file +# ------------------------------------------------------- +if [[ -n "${CDR_BRANCH}" ]]; then + URL="https://raw.githubusercontent.com/DARPA-CRITICALMAAS/uiuc-cdr/refs/heads/${CDR_BRANCH}/" +else + URL="https://raw.githubusercontent.com/DARPA-CRITICALMAAS/uiuc-cdr/refs/tags/${CDR_TAG}/" +fi +curl -L -s $URL/docker-compose.yml -o docker-compose.yml + +# ------------------------------------------------------- +# create .env file +# ------------------------------------------------------- +cat << EOF > .env +SERVER_NAME="${SERVER_NAME}" +CDR_URL="${CDR_URL}" + +TRAEFIK_ACME_EMAIL="${EMAIL_ADDRESS}" + +CDR_TOKEN="${CDR_TOKEN}" +CDRHOOK_VERSION="${CDRHOOK_VERSION}" + +CALLBACK_PATH=/hook +CALLBACK_URL="${CDRHOOK_URL}" +CALLBACK_SECRET="${CDRHOOK_SECRET}" +CALLBACK_USERNAME="" +CALLBACK_PASSWORD="" + +RABBITMQ_USERNAME="${RABBITMQ_USERNAME}" +RABBITMQ_PASSWORD="${RABBITMQ_PASSWORD}" +RABBITMQ_MGMT_URL="${RABBITMQ_MGMT_URL}" + +PIPELINE_VERSION="${PIPELINE_VERSION}" +EOF + +# ------------------------------------------------------- +# create docker-compose.override file +# ------------------------------------------------------- +if [ -e docker-compose.override.yml ]; then + echo "docker-compose.override.yml already exists, skipping" +else + cat << "EOF" > docker-compose.override.yml +services: + # ---------------------------------------------------------------------- + # Add SSL to traefik + # ---------------------------------------------------------------------- +# traefik: +# command: +# - --log.level=INFO +# - --api=true +# - --api.dashboard=true +# - --api.insecure=true +# # Entrypoints +# - --entrypoints.http.address=:80 +# - --entrypoints.http.http.redirections.entryPoint.to=https +# - --entrypoints.https.address=:443 +# - --entrypoints.https.http.tls.certresolver=myresolver +# # letsencrypt +# - --certificatesresolvers.myresolver.acme.email=${TRAEFIK_ACME_EMAIL} +# - --certificatesresolvers.myresolver.acme.storage=/config/acme.json +# # uncomment to use testing certs +# #- --certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory +# - --certificatesresolvers.myresolver.acme.httpchallenge=true +# - --certificatesresolvers.myresolver.acme.httpchallenge.entrypoint=http +# # Docker setup +# - --providers.docker=true +# - --providers.docker.endpoint=unix:///var/run/docker.sock +# - --providers.docker.exposedbydefault=false +# - --providers.docker.watch=true +# ports: +# - "80:80" +# - "443:443" + + # default models for cdrhook + cdrhook: + volumes: + - ./models.json:/app/models.json + - ./systems.json:/app/systems.json + + # open up rabbitmq + rabbitmq: + labels: + - "traefik.enable=true" + - "traefik.http.services.rabbitmq.loadbalancer.server.port=15672" + - "traefik.http.routers.rabbitmq.rule=Host(`${SERVER_NAME}`)" + + # Add dependency of rabbitmq + icy-resin: + depends_on: + - rabbitmq + + downloader: + depends_on: + - rabbitmq + + uploader: + depends_on: + - rabbitmq + +# ------------------------------------------------------- +# mount volumes to local file system +# ------------------------------------------------------- +volumes: +# following probably don't need to be mounted to local +# file system (i.e large NFS storage), this should only +# use minimal space. +# traefik +# rabbitmq: +# following probably should be mounted to larger file +# system (i.e. NFS storage). +# hold json objects to process in cdrhook +# cdrhook: +# driver: local +# driver_opts: +# type: none +# device: /data/volumes/cdrhook +# o: bind +# hold output from pipeline +# feedback: +# driver: local +# driver_opts: +# type: none +# device: /data/volumes/feedback +# o: bind +# data: +# driver: local +# driver_opts: +# type: none +# device: /data/volumes/data +# o: bind +# logs: +# driver: local +# driver_opts: +# type: none +# device: /data/volumes/logs +# o: bind +# output: +# driver: local +# driver_opts: +# type: none +# device: /data/volumes/output +# o: bind +EOF +fi + +# ------------------------------------------------------- +# create rabbitmq configuraion file +# ------------------------------------------------------- +if [ -e 50-criticalmaas.conf ]; then + echo "50-criticalmaas.conf already exists, skipping" +else + cat << "EOF" > 50-criticalmaas.conf +consumer_timeout = 7200000 +EOF +fi + +# ------------------------------------------------------- +# create default models and systems file +# ------------------------------------------------------- +if [ -e models.json ]; then + echo "models.json already exists, skipping" +else + cat << "EOF" > models.json +{ + "icy_resin": ["map_area", "polygon_legend_area"] +} +EOF +fi +if [ -e systems.json ]; then + echo "systems.json already exists, skipping" +else + cat << "EOF" > systems.json +{ + "area": ["uncharted-area"], + "legend": ["polymer"] +} +EOF +fi + +# ------------------------------------------------------- +# start the full stack +# ------------------------------------------------------- +echo "" +docker compose --profile ${PROFILE} up -d +echo "" + +# ------------------------------------------------------- +# finished +# ------------------------------------------------------- +echo "Quickstart complete" +echo "Please visit http://${SERVER_NAME}/monitor/ to access the RabbitMQ monitor interface" +echo "Please visit http://${SERVER_NAME} to access the RabbitMQ management interface"