Skip to content

Commit

Permalink
install numactl to enable fastsafetensors
Browse files Browse the repository at this point in the history
Signed-off-by: Jefferson Fialho <[email protected]>
  • Loading branch information
fialhocoelho committed Dec 12, 2024
1 parent 97a6423 commit b71bef9
Showing 1 changed file with 50 additions and 21 deletions.
71 changes: 50 additions & 21 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,40 @@ FROM python-install as cuda-base
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo

# Install necessary packages to build numactl from source
RUN microdnf install -y autoconf automake libtool make rpm-build

# Download the numactl source RPM to /root directory
WORKDIR /root
RUN microdnf download --enablerepo=ubi-9-baseos-source --source numactl.src

# Get NUMACTL_V (just the version part of the RPM file)
RUN NUMACTL_V=$(ls /root/numactl-* | sed -r 's|/root/numactl-(.+)\.el9\.src\.rpm|\1|') && \
echo "NUMACTL_V is set to $NUMACTL_V" && \
echo "NUMACTL_V=$NUMACTL_V" >> /root/.bashrc && \
export NUMACTL_V=$NUMACTL_V

# Set NUMACTL_V as an environment variable for future layers
ENV NUMACTL_V=${NUMACTL_V}

# Install the source RPM
RUN rpm -i /root/numactl-${NUMACTL_V}.el9.src.rpm

# Build numactl from source
RUN rpmbuild -ba /root/rpmbuild/SPECS/numactl.spec

# Install the built RPMs
RUN rpm -i /root/rpmbuild/RPMS/x86_64/{numactl-libs-${NUMACTL_V}.el9.x86_64.rpm,numactl-${NUMACTL_V}.el9.x86_64.rpm,numactl-devel-${NUMACTL_V}.el9.x86_64.rpm}

RUN microdnf install -y \
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
microdnf clean all

# Search for numa.h in common locations
RUN echo "### Searching for numa.h in common directories ###" && \
find /usr /usr/local /opt /lib -name numa.h | tee /dev/stderr && \
echo "### END OF NUMA.H SEARCH ###"

ENV CUDA_HOME="/usr/local/cuda" \
PATH="${CUDA_HOME}/bin:${PATH}" \
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
Expand Down Expand Up @@ -201,23 +231,22 @@ WORKDIR /home/vllm

ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]


FROM vllm-openai as vllm-grpc-adapter

USER root

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3

ENV GRPC_PORT=8033 \
PORT=8000 \
# As an optimization, vLLM disables logprobs when using spec decoding by
# default, but this would be unexpected to users of a hosted model that
# happens to have spec decoding
# see: https://github.com/vllm-project/vllm/pull/6485
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false

USER 2000
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
# FROM vllm-openai as vllm-grpc-adapter
#
# USER root
#
# RUN --mount=type=cache,target=/root/.cache/pip \
# --mount=type=cache,target=/root/.cache/uv \
# --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
# HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3
#
# ENV GRPC_PORT=8033 \
# PORT=8000 \
# # As an optimization, vLLM disables logprobs when using spec decoding by
# # default, but this would be unexpected to users of a hosted model that
# # happens to have spec decoding
# # see: https://github.com/vllm-project/vllm/pull/6485
# DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
#
# USER 2000
# ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]

0 comments on commit b71bef9

Please sign in to comment.