From 429c726e3f9aba7841faa75403e84e603af3cfa3 Mon Sep 17 00:00:00 2001 From: Ignacio Heredia Date: Mon, 25 Nov 2024 10:11:47 +0100 Subject: [PATCH] feat: add UI to Nomad inference endpoints (#76) * feat: add UI to deploy * feat: add support for user-defined `custom` endpoint --- ai4papi/routers/v1/deployments/modules.py | 12 +++++ etc/modules/nomad.hcl | 65 ++++++++++++++++++++++- etc/try_me/nomad.hcl | 2 +- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/ai4papi/routers/v1/deployments/modules.py b/ai4papi/routers/v1/deployments/modules.py index 9c1f74d..e1d0366 100644 --- a/ai4papi/routers/v1/deployments/modules.py +++ b/ai4papi/routers/v1/deployments/modules.py @@ -289,8 +289,20 @@ def create_deployment( # Always exclude initial 'dataset_download' task, as it is used as template exclude_tasks = ['dataset_download'] + # If DEEPaaS was not launched, do not launch UI because it will fail + if user_conf['general']['service'] != 'deepaas': + exclude_tasks.append('ui') + tasks[:] = [t for t in tasks if t['Name'] not in exclude_tasks] + # Remove appropriate Traefik domains in each case (no need to remove the ports) + services = nomad_conf['TaskGroups'][0]['Services'] + if user_conf['general']['service'] == 'deepaas': + exclude_services = ['ide'] + else: + exclude_services = ['ui'] + services[:] = [s for s in services if s['PortLabel'] not in exclude_services] + # Submit job r = nomad.create_deployment(nomad_conf) diff --git a/etc/modules/nomad.hcl b/etc/modules/nomad.hcl index ef28693..6584cab 100644 --- a/etc/modules/nomad.hcl +++ b/etc/modules/nomad.hcl @@ -92,6 +92,12 @@ job "module-${JOB_UUID}" { port "ide" { to = 8888 } + port "ui" { + to = 80 + } + port "custom" { + to = 80 + } } service { @@ -124,6 +130,26 @@ job "module-${JOB_UUID}" { ] } + service { + name = "${JOB_UUID}-ui" + port = "ui" + tags = [ + "traefik.enable=true", + "traefik.http.routers.${JOB_UUID}-ui.tls=true", + "traefik.http.routers.${JOB_UUID}-ui.rule=Host(`ui-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`, `www.ui-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`)", + ] + } + + service { + name = "${JOB_UUID}-custom" + port = "custom" + tags = [ + "traefik.enable=true", + "traefik.http.routers.${JOB_UUID}-custom.tls=true", + "traefik.http.routers.${JOB_UUID}-custom.rule=Host(`custom-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`, `www.custom-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`)", + ] + } + ephemeral_disk { size = ${DISK} } @@ -236,7 +262,7 @@ job "module-${JOB_UUID}" { image = "${DOCKER_IMAGE}:${DOCKER_TAG}" command = "deep-start" args = ["--${SERVICE}"] - ports = ["api", "monitor", "ide"] + ports = ["api", "monitor", "ide", "custom"] shm_size = ${SHARED_MEMORY} memory_hard_limit = ${RAM} volumes = [ @@ -276,6 +302,43 @@ job "module-${JOB_UUID}" { } } + task "ui" { # DEEPaaS UI (Gradio) + + # Run as post-start to make sure DEEPaaS up before launching the UI + lifecycle { + hook = "poststart" + sidecar = true + } + + driver = "docker" + + config { + force_pull = true + image = "registry.services.ai4os.eu/ai4os/deepaas_ui:latest" + ports = ["ui"] + shm_size = 250000000 # 250MB + memory_hard_limit = 500 # MB + } + + env { + DURATION = "10m" # kill job after 10 mins + UI_PORT = 80 + } + + resources { + cpu = 500 # MHz + memory = 500 # MB + memory_max = 500 # MB + } + + # Do not try to restart a try-me job if it raises error (module incompatible with Gradio UI) + restart { + attempts = 0 + mode = "fail" + } + + } + task "storage_cleanup" { // Unmount empty storage folder and delete it from host diff --git a/etc/try_me/nomad.hcl b/etc/try_me/nomad.hcl index e829962..3279953 100644 --- a/etc/try_me/nomad.hcl +++ b/etc/try_me/nomad.hcl @@ -62,7 +62,7 @@ job "try-${JOB_UUID}" { to = 80 # -1 will assign random port } port "api" { - to = 5000 # -1 will assign random port + to = 5000 } }