diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 230966ffc74a7..686fa435f99a1 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -196,9 +196,11 @@ RUN umask 002 \ && chmod g+rwx $HOME /usr/src /workspace COPY LICENSE /licenses/vllm.md +COPY --chown=2000:0 --chmod=554 extras/wait-modelcar.sh . USER 2000 -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] +ENTRYPOINT ["/workspace/wait-modelcar.sh"] +CMD ["python3", "-m", "vllm.entrypoints.openai.api_server"] FROM vllm-openai as vllm-grpc-adapter @@ -217,4 +219,4 @@ ENV GRPC_PORT=8033 \ DISABLE_LOGPROBS_DURING_SPEC_DECODING=false USER 2000 -ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] +CMD ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] diff --git a/extras/wait-modelcar.sh b/extras/wait-modelcar.sh new file mode 100755 index 0000000000000..a89f06e512a79 --- /dev/null +++ b/extras/wait-modelcar.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +if [ "${MODEL_INIT_MODE}" = "async" ] ; then + echo "Waiting for model files (modelcar) to be present..." + until test -e /mnt/models; do + sleep 1 + done + + echo "Model files are now available." +fi + +echo "Starting model server..." +eval $@ +