From 49318047e317062d7c7c9836f071ab59bb5afaa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Hern=C3=A1ndez?= <23639005+israel-hdez@users.noreply.github.com> Date: Mon, 2 Sep 2024 17:09:02 -0600 Subject: [PATCH] Add script to wait for the model to appear when using KServe Modelcar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using OCI containers for model storage in KServe (modelcar), there is the possibility that the model server starts before the model has been fully downloaded. When this happens, the model server would terminate with error because the model path is empty. This adds a small script to wait for the cluster to fully download the model container before invoking the model server. The waiting is triggered when the MODEL_INIT_MODE environment variable is set to the "async". Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com> --- Dockerfile.ubi | 6 ++++-- extras/wait-modelcar.sh | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 extras/wait-modelcar.sh diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 230966ffc74a7..686fa435f99a1 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -196,9 +196,11 @@ RUN umask 002 \ && chmod g+rwx $HOME /usr/src /workspace COPY LICENSE /licenses/vllm.md +COPY --chown=2000:0 --chmod=554 extras/wait-modelcar.sh . USER 2000 -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] +ENTRYPOINT ["/workspace/wait-modelcar.sh"] +CMD ["python3", "-m", "vllm.entrypoints.openai.api_server"] FROM vllm-openai as vllm-grpc-adapter @@ -217,4 +219,4 @@ ENV GRPC_PORT=8033 \ DISABLE_LOGPROBS_DURING_SPEC_DECODING=false USER 2000 -ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] +CMD ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] diff --git a/extras/wait-modelcar.sh b/extras/wait-modelcar.sh new file mode 100755 index 0000000000000..a89f06e512a79 --- /dev/null +++ b/extras/wait-modelcar.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +if [ "${MODEL_INIT_MODE}" = "async" ] ; then + echo "Waiting for model files (modelcar) to be present..." + until test -e /mnt/models; do + sleep 1 + done + + echo "Model files are now available." +fi + +echo "Starting model server..." +eval $@ +