Skip to content

Commit

Permalink
🍱 move over internal changes
Browse files Browse the repository at this point in the history
Co-authored-by: Nick Hill <[email protected]>
Co-authored-by: Travis Johnson <[email protected]>
Signed-off-by: Joe Runde <[email protected]>
  • Loading branch information
3 people committed Mar 5, 2024
1 parent 05af6da commit cde932c
Show file tree
Hide file tree
Showing 33 changed files with 2,104 additions and 139 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
vllm/*.so
.*
docs
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ ADD . /vllm-workspace/
COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
# ignore build dependencies installation because we are using pre-complied extensions
RUN rm pyproject.toml
RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install .[ray] --verbose
#################### TEST IMAGE ####################


Expand All @@ -80,7 +80,6 @@ RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip instal
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS vllm-base

# libnccl required for ray
RUN apt-get update -y \
&& apt-get install -y python3-pip

Expand Down
282 changes: 282 additions & 0 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
## Global Args #################################################################
ARG BASE_UBI_IMAGE_TAG=9.3-1552
ARG PYTHON_VERSION=3.11
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
ARG PYTORCH_VERSION=2.1.2

# NOTE: This setting only has an effect when not using prebuilt-wheel kernels
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"


## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base

WORKDIR /workspace

ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8

# Some utils for dev purposes - tar required for kubectl cp
RUN microdnf install -y \
which procps findutils tar vim \
&& microdnf clean all


## Python Installer ############################################################
FROM base as python-install

ARG PYTHON_VERSION
ARG MINIFORGE_VERSION=23.11.0-0

RUN curl -fsSL -o ~/miniforge3.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-$(uname)-$(uname -m).sh" && \
chmod +x ~/miniforge3.sh && \
bash ~/miniforge3.sh -b -p /opt/conda && \
source "/opt/conda/etc/profile.d/conda.sh" && \
conda create -y -p /opt/vllm python=${PYTHON_VERSION} && \
conda activate /opt/vllm && \
rm ~/miniforge3.sh
# use of the /opt/vllm env requires:
# ENV PATH=/opt/vllm/bin/:$PATH


## Python Base #################################################################
FROM base as python-base

COPY --from=python-install --link /opt/vllm /opt/vllm

ENV PATH=/opt/vllm/bin/:$PATH


## Python/Torch Base ###########################################################
FROM python-base as python-torch-base

ARG PYTORCH_INDEX
ARG PYTORCH_VERSION

RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install torch==$PYTORCH_VERSION+cu121 --index-url "${PYTORCH_INDEX}/cu121"


## CUDA Base ###################################################################
FROM base as cuda-base

# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
# this env var is set to 12.2.0, even though it's compatible
#ENV CUDA_VERSION=12.2.0 \
ENV CUDA_VERSION=12.0.0 \
NV_CUDA_LIB_VERSION=12.2.0-1 \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
NV_CUDA_CUDART_VERSION=12.2.53-1 \
NV_CUDA_COMPAT_VERSION=535.104.12

RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo

RUN microdnf install -y \
cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \
cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
&& microdnf clean all

ENV CUDA_HOME="/usr/local/cuda" \
PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"


## CUDA Runtime ################################################################
FROM cuda-base as cuda-runtime

ENV NV_NVTX_VERSION=12.2.53-1 \
NV_LIBNPP_VERSION=12.1.1.14-1 \
NV_LIBCUBLAS_VERSION=12.2.1.16-1 \
NV_LIBNCCL_PACKAGE_VERSION=2.18.5-1+cuda12.2

RUN microdnf install -y \
cuda-libraries-12-2-${NV_CUDA_LIB_VERSION} \
cuda-nvtx-12-2-${NV_NVTX_VERSION} \
libnpp-12-2-${NV_LIBNPP_VERSION} \
libcublas-12-2-${NV_LIBCUBLAS_VERSION} \
libnccl-${NV_LIBNCCL_PACKAGE_VERSION} \
&& microdnf clean all


## CUDA Development ############################################################
FROM cuda-base as cuda-devel

ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \
NV_NVML_DEV_VERSION=12.2.81-1 \
NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \
NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2

RUN microdnf install -y \
cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \
cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \
cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \
cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \
cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \
libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \
libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
&& microdnf clean all

ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"


## Development #################################################################
FROM cuda-devel AS dev

COPY --from=python-torch-base --link /opt/vllm /opt/vllm
ENV PATH=/opt/vllm/bin/:$PATH

# install build and runtime dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=requirements.txt \
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
pip3 install \
-r requirements.txt \
-r requirements-dev.txt


## Builder #####################################################################
FROM dev AS build

# install build dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
pip install -r requirements-build.txt

# copy input files
COPY csrc csrc
COPY setup.py setup.py
COPY requirements.txt requirements.txt
COPY pyproject.toml pyproject.toml
COPY vllm/__init__.py vllm/__init__.py

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST

# max jobs used by Ninja to build extensions
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1

RUN python3 setup.py build_ext --inplace


## Extension Cache #############################################################
# Instead of compiling artifacts every build just copy from pre-built wheel
# This might not work if the PyTorch and CUDA versions don't match!
FROM base as prebuilt-wheel

RUN microdnf install -y \
unzip \
&& microdnf clean all

ARG PYTHON_VERSION
# 0.3.2 is built for CUDA 12.1 and PyTorch 2.1.2
ARG VLLM_WHEEL_VERSION=0.3.2

RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
&& unzip vllm.whl \
&& rm vllm.whl
# compiled extensions located at /workspace/vllm/*.so


## Test ########################################################################
FROM dev AS test

WORKDIR /vllm-workspace
# ADD is used to preserve directory structure
# NB: Could leak secrets from local context, the test image should not be pushed
# to a registry
ADD . /vllm-workspace/
# copy pytorch extensions separately to avoid having to rebuild
# when python code changes
COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
# ignore build dependencies installation because we are using pre-complied extensions
RUN rm pyproject.toml
RUN --mount=type=cache,target=/root/.cache/pip \
VLLM_USE_PRECOMPILED=1 pip install . --verbose


## Proto Compilation ###########################################################
FROM python-base AS gen-protos

RUN microdnf install -y \
make \
findutils \
&& microdnf clean all

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=Makefile,target=Makefile \
--mount=type=bind,source=proto,target=proto \
make gen-protos

## vLLM Library Files ##########################################################
# Little extra stage to gather files and manage permissions on them without any
# duplication in the release layer due to permission changes
FROM base AS vllm

WORKDIR /vllm-staging
# COPY files from various places into a staging directory
COPY --link vllm vllm
COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/
COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb

# custom COPY command to use umask to control permissions and grant permissions
# to the group
RUN umask 002 \
&& cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
# not strictly needed, but .so files typically have executable bits
&& chmod +x /workspace/vllm/*.so

## Release #####################################################################
# Note from the non-UBI Dockerfile:
# We used base cuda image because pytorch installs its own cuda libraries.
# However cupy depends on cuda libraries so we had to switch to the runtime image
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
FROM cuda-runtime AS vllm-openai

WORKDIR /workspace

# Create release python environment
COPY --from=python-torch-base --link /opt/vllm /opt/vllm
ENV PATH=/opt/vllm/bin/:$PATH

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=requirements.txt \
pip3 install \
-r requirements.txt \
# additional dependencies for the TGIS gRPC server
grpcio-tools==1.62.0 \
# additional dependencies for openai api_server
accelerate==0.27.2

# vLLM will not be installed in site-packages
COPY --from=vllm --link /workspace/ ./

# Triton needs a CC compiler
RUN microdnf install -y gcc \
&& microdnf clean all

ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
GRPC_PORT=8033 \
HOME=/home/vllm

# setup non-root user for OpenShift
RUN microdnf install -y shadow-utils \
&& umask 002 \
&& useradd --uid 2000 --gid 0 vllm \
&& microdnf remove -y shadow-utils \
&& microdnf clean all \
&& chmod g+rwx $HOME /usr/src /workspace

USER 2000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
39 changes: 39 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
SHELL := /bin/bash

server_image_name := tgis-vllm
server_image_target := vllm-openai

##@ Development Tasks

has_gawk := $(shell gawk --version 2>/dev/null)
.PHONY: help
help: ## Display this help.
ifdef has_gawk
@gawk -f ./scripts/makefile.help.awk $(MAKEFILE_LIST)
else
@awk 'BEGIN{FS=":.*##"; printf("\nUsage:\n make \033[36m<target>\033[0m\n\n")} /^[-a-zA-Z_0-9\\.]+:.*?##/ {t=$$1; if(!(t in p)){p[t]; printf("\033[36m%-15s\033[0m %s\n", t, $$2)}}' $(MAKEFILE_LIST)
@echo
@echo "NOTE: Help output with headers requires GNU extensions to awk. Please install gawk for the best experience."
endif

target_path := "vllm/entrypoints/grpc/pb"
gen-protos:
# Compile protos
pip install grpcio-tools==1.62.0 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
mkdir -p $(target_path)
python -m grpc_tools.protoc -Iproto --python_out=$(target_path) \
--grpc_python_out=$(target_path) --mypy_out=$(target_path) proto/generation.proto
find $(target_path)/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
touch $(target_path)/__init__.py


##@ Container Build Tasks

.PHONY: build
build: ##
DOCKER_BUILDKIT=1 docker build \
--file Dockerfile.ubi \
--target $(server_image_target) \
--progress plain \
--tag "$(server_image_name)" .
docker images
Loading

0 comments on commit cde932c

Please sign in to comment.