From 49318047e317062d7c7c9836f071ab59bb5afaa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Edgar=20Hern=C3=A1ndez?=
 <23639005+israel-hdez@users.noreply.github.com>
Date: Mon, 2 Sep 2024 17:09:02 -0600
Subject: [PATCH] Add script to wait for the model to appear when using KServe
 Modelcar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using OCI containers for model storage in KServe (modelcar), there
is the possibility that the model server starts before the model has
been fully downloaded. When this happens, the model server would
terminate with error because the model path is empty.

This adds a small script to wait for the cluster to fully download the
model container before invoking the model server. The waiting is
triggered when the MODEL_INIT_MODE environment variable is set to the "async".

Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com>
---
 Dockerfile.ubi          |  6 ++++--
 extras/wait-modelcar.sh | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100755 extras/wait-modelcar.sh

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 230966ffc74a7..686fa435f99a1 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -196,9 +196,11 @@ RUN umask 002 \
     && chmod g+rwx $HOME /usr/src /workspace
 
 COPY LICENSE /licenses/vllm.md
+COPY --chown=2000:0 --chmod=554 extras/wait-modelcar.sh .
 
 USER 2000
-ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+ENTRYPOINT ["/workspace/wait-modelcar.sh"]
+CMD ["python3", "-m", "vllm.entrypoints.openai.api_server"]
 
 
 FROM vllm-openai as vllm-grpc-adapter
@@ -217,4 +219,4 @@ ENV GRPC_PORT=8033 \
     DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
 
 USER 2000
-ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
+CMD ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
diff --git a/extras/wait-modelcar.sh b/extras/wait-modelcar.sh
new file mode 100755
index 0000000000000..a89f06e512a79
--- /dev/null
+++ b/extras/wait-modelcar.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+if [ "${MODEL_INIT_MODE}" = "async" ] ; then
+  echo "Waiting for model files (modelcar) to be present..."
+  until test -e /mnt/models; do
+    sleep 1
+  done
+
+  echo "Model files are now available."
+fi
+
+echo "Starting model server..."
+eval $@
+