🍱 move over internal changes

Co-authored-by: Nick Hill <[email protected]> Co-authored-by: Travis Johnson <[email protected]> Signed-off-by: Joe Runde <[email protected]>
IBM · Mar 5, 2024 · cde932c · cde932c
1 parent 05af6da
commit cde932c
Show file tree

Hide file tree

Showing 33 changed files with 2,104 additions and 139 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1 +1,3 @@
 vllm/*.so
+.*
+docs
diff --git a/Dockerfile b/Dockerfile
@@ -70,7 +70,7 @@ ADD . /vllm-workspace/
 COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
 # ignore build dependencies installation because we are using pre-complied extensions
 RUN rm pyproject.toml
-RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
+RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install .[ray] --verbose
 #################### TEST IMAGE ####################
 
 
@@ -80,7 +80,6 @@ RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip instal
 # In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
 FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS vllm-base
 
-# libnccl required for ray
 RUN apt-get update -y \
     && apt-get install -y python3-pip
 

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
@@ -0,0 +1,282 @@
+## Global Args #################################################################
+ARG BASE_UBI_IMAGE_TAG=9.3-1552
+ARG PYTHON_VERSION=3.11
+ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
+# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
+ARG PYTORCH_VERSION=2.1.2
+
+# NOTE: This setting only has an effect when not using prebuilt-wheel kernels
+ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+
+
+## Base Layer ##################################################################
+FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
+
+WORKDIR /workspace
+
+ENV LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8
+
+# Some utils for dev purposes - tar required for kubectl cp
+RUN microdnf install -y \
+        which procps findutils tar vim \
+    && microdnf clean all
+
+
+## Python Installer ############################################################
+FROM base as python-install
+
+ARG PYTHON_VERSION
+ARG MINIFORGE_VERSION=23.11.0-0
+
+RUN curl -fsSL -o ~/miniforge3.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-$(uname)-$(uname -m).sh" && \
+    chmod +x ~/miniforge3.sh && \
+    bash ~/miniforge3.sh -b -p /opt/conda && \
+    source "/opt/conda/etc/profile.d/conda.sh" && \
+    conda create -y -p /opt/vllm python=${PYTHON_VERSION} && \
+    conda activate /opt/vllm && \
+    rm ~/miniforge3.sh
+# use of the /opt/vllm env requires:
+# ENV PATH=/opt/vllm/bin/:$PATH
+
+
+## Python Base #################################################################
+FROM base as python-base
+
+COPY --from=python-install --link /opt/vllm /opt/vllm
+
+ENV PATH=/opt/vllm/bin/:$PATH
+
+
+## Python/Torch Base ###########################################################
+FROM python-base as python-torch-base
+
+ARG PYTORCH_INDEX
+ARG PYTORCH_VERSION
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip3 install torch==$PYTORCH_VERSION+cu121 --index-url "${PYTORCH_INDEX}/cu121"
+
+
+## CUDA Base ###################################################################
+FROM base as cuda-base
+
+# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
+# this env var is set to 12.2.0, even though it's compatible
+#ENV CUDA_VERSION=12.2.0 \
+ENV CUDA_VERSION=12.0.0 \
+    NV_CUDA_LIB_VERSION=12.2.0-1 \
+    NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+    NV_CUDA_CUDART_VERSION=12.2.53-1 \
+    NV_CUDA_COMPAT_VERSION=535.104.12
+
+RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
+        https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
+
+RUN microdnf install -y \
+        cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \
+        cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
+    && microdnf clean all
+
+ENV CUDA_HOME="/usr/local/cuda" \
+    PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
+    LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
+
+
+## CUDA Runtime ################################################################
+FROM cuda-base as cuda-runtime
+
+ENV NV_NVTX_VERSION=12.2.53-1 \
+    NV_LIBNPP_VERSION=12.1.1.14-1 \
+    NV_LIBCUBLAS_VERSION=12.2.1.16-1 \
+    NV_LIBNCCL_PACKAGE_VERSION=2.18.5-1+cuda12.2
+
+RUN microdnf install -y \
+        cuda-libraries-12-2-${NV_CUDA_LIB_VERSION} \
+        cuda-nvtx-12-2-${NV_NVTX_VERSION} \
+        libnpp-12-2-${NV_LIBNPP_VERSION} \
+        libcublas-12-2-${NV_LIBCUBLAS_VERSION} \
+        libnccl-${NV_LIBNCCL_PACKAGE_VERSION} \
+    && microdnf clean all
+
+
+## CUDA Development ############################################################
+FROM cuda-base as cuda-devel
+
+ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \
+    NV_NVML_DEV_VERSION=12.2.81-1 \
+    NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \
+    NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \
+    NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2
+
+RUN microdnf install -y \
+        cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \
+        cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \
+        cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \
+        cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \
+        cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \
+        libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \
+        libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \
+        libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
+    && microdnf clean all
+
+ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
+
+
+## Development #################################################################
+FROM cuda-devel AS dev
+
+COPY --from=python-torch-base --link /opt/vllm /opt/vllm
+ENV PATH=/opt/vllm/bin/:$PATH
+
+# install build and runtime dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
+    pip3 install \
+        -r requirements.txt \
+        -r requirements-dev.txt
+
+
+## Builder #####################################################################
+FROM dev AS build
+
+# install build dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
+    pip install -r requirements-build.txt
+
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY requirements.txt requirements.txt
+COPY pyproject.toml pyproject.toml
+COPY vllm/__init__.py vllm/__init__.py
+
+ARG TORCH_CUDA_ARCH_LIST
+ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
+
+# max jobs used by Ninja to build extensions
+ARG max_jobs=2
+ENV MAX_JOBS=${max_jobs}
+# number of threads used by nvcc
+ARG nvcc_threads=8
+ENV NVCC_THREADS=$nvcc_threads
+# make sure punica kernels are built (for LoRA)
+ENV VLLM_INSTALL_PUNICA_KERNELS=1
+
+RUN python3 setup.py build_ext --inplace
+
+
+## Extension Cache #############################################################
+# Instead of compiling artifacts every build just copy from pre-built wheel
+# This might not work if the PyTorch and CUDA versions don't match!
+FROM base as prebuilt-wheel
+
+RUN microdnf install -y \
+        unzip \
+    && microdnf clean all
+
+ARG PYTHON_VERSION
+# 0.3.2 is built for CUDA 12.1 and PyTorch 2.1.2
+ARG VLLM_WHEEL_VERSION=0.3.2
+
+RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
+    && unzip vllm.whl \
+    && rm vllm.whl
+# compiled extensions located at /workspace/vllm/*.so
+
+
+## Test ########################################################################
+FROM dev AS test
+
+WORKDIR /vllm-workspace
+# ADD is used to preserve directory structure
+# NB: Could leak secrets from local context, the test image should not be pushed
+# to a registry
+ADD . /vllm-workspace/
+# copy pytorch extensions separately to avoid having to rebuild
+# when python code changes
+COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
+# ignore build dependencies installation because we are using pre-complied extensions
+RUN rm pyproject.toml
+RUN --mount=type=cache,target=/root/.cache/pip \
+    VLLM_USE_PRECOMPILED=1 pip install . --verbose
+
+
+## Proto Compilation ###########################################################
+FROM python-base AS gen-protos
+
+RUN microdnf install -y \
+        make \
+        findutils \
+    && microdnf clean all
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=Makefile,target=Makefile \
+    --mount=type=bind,source=proto,target=proto \
+    make gen-protos
+
+## vLLM Library Files ##########################################################
+# Little extra stage to gather files and manage permissions on them without any
+# duplication in the release layer due to permission changes
+FROM base AS vllm
+
+WORKDIR /vllm-staging
+# COPY files from various places into a staging directory
+COPY --link vllm vllm
+COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/
+COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
+
+# custom COPY command to use umask to control permissions and grant permissions
+# to the group
+RUN umask 002 \
+    && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
+    # not strictly needed, but .so files typically have executable bits
+    && chmod +x /workspace/vllm/*.so
+
+## Release #####################################################################
+# Note from the non-UBI Dockerfile:
+# We used base cuda image because pytorch installs its own cuda libraries.
+# However cupy depends on cuda libraries so we had to switch to the runtime image
+# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
+FROM cuda-runtime AS vllm-openai
+
+WORKDIR /workspace
+
+# Create release python environment
+COPY --from=python-torch-base --link /opt/vllm /opt/vllm
+ENV PATH=/opt/vllm/bin/:$PATH
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    pip3 install \
+        -r requirements.txt \
+        # additional dependencies for the TGIS gRPC server
+        grpcio-tools==1.62.0 \
+        # additional dependencies for openai api_server
+        accelerate==0.27.2
+
+# vLLM will not be installed in site-packages
+COPY --from=vllm --link /workspace/ ./
+
+# Triton needs a CC compiler
+RUN microdnf install -y gcc \
+    && microdnf clean all
+
+ENV HF_HUB_OFFLINE=1 \
+    PORT=8000 \
+    GRPC_PORT=8033 \
+    HOME=/home/vllm
+
+# setup non-root user for OpenShift
+RUN microdnf install -y shadow-utils \
+    && umask 002 \
+    && useradd --uid 2000 --gid 0 vllm \
+    && microdnf remove -y shadow-utils \
+    && microdnf clean all \
+    && chmod g+rwx $HOME /usr/src /workspace
+
+USER 2000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,39 @@
+SHELL := /bin/bash
+
+server_image_name := tgis-vllm
+server_image_target := vllm-openai
+
+##@ Development Tasks
+
+has_gawk := $(shell gawk --version 2>/dev/null)
+.PHONY: help
+help: ## Display this help.
+ifdef has_gawk
+	@gawk -f ./scripts/makefile.help.awk $(MAKEFILE_LIST)
+else
+	@awk 'BEGIN{FS=":.*##"; printf("\nUsage:\n  make \033[36m<target>\033[0m\n\n")} /^[-a-zA-Z_0-9\\.]+:.*?##/ {t=$$1; if(!(t in p)){p[t]; printf("\033[36m%-15s\033[0m %s\n", t, $$2)}}' $(MAKEFILE_LIST)
+	@echo
+	@echo "NOTE: Help output with headers requires GNU extensions to awk. Please install gawk for the best experience."
+endif
+
+target_path := "vllm/entrypoints/grpc/pb"
+gen-protos:
+	# Compile protos
+	pip install grpcio-tools==1.62.0 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
+	mkdir -p $(target_path)
+	python -m grpc_tools.protoc -Iproto --python_out=$(target_path) \
+		--grpc_python_out=$(target_path) --mypy_out=$(target_path) proto/generation.proto
+	find $(target_path)/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
+	touch $(target_path)/__init__.py
+
+
+##@ Container Build Tasks
+
+.PHONY: build
+build: ##
+	DOCKER_BUILDKIT=1 docker build \
+	--file Dockerfile.ubi \
+	--target $(server_image_target) \
+	--progress plain \
+	--tag "$(server_image_name)" .
+	docker images