Skip to content

Commit

Permalink
reduced the size of the nvidia containerfile
Browse files Browse the repository at this point in the history
Signed-off-by: Brian <[email protected]>
  • Loading branch information
bmahabirbu committed Nov 1, 2024
1 parent 61d0f3a commit ab17824
Showing 1 changed file with 42 additions and 52 deletions.
94 changes: 42 additions & 52 deletions container-images/cuda/Containerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder

# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
ARG HUGGINGFACE_HUB_VERSION=0.26.2
Expand All @@ -8,64 +9,53 @@ ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555
# renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5

# vulkan-headers vulkan-loader-devel vulkan-tools glslc glslang python3-pip mesa-libOpenCL-$MESA_VER.aarch64
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
crb enable && \
dnf install -y epel-release && \
dnf --enablerepo=ubi-9-appstream-rpms install -y git procps-ng vim \
dnf-plugins-core python3-dnf-plugin-versionlock cmake gcc-c++ \
python3-pip && \
dnf clean all && \
rm -rf /var/cache/*dnf*

RUN /usr/bin/python3 --version
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}"
RUN pip install "omlmd==${OMLMD_VERSION}"

# CUDA_DOCKER_ARCH =
# Hopper GPUs (e.g., H100): Use 90
# Ampere GPUs (e.g., RTX 30 Series, A100): Use 80
# Turing GPUs (e.g., RTX 20 Series, GTX 16 Series): Use 75
# Volta GPUs (e.g., V100): Use 70
# Pascal GPUs (e.g., GTX 10 Series): Use 61
# Maxwell GPUs (e.g., GTX 900 Series): Use 52
# Kepler GPUs (e.g., GTX 600 and 700 Series): Use 35

# Change to your gpu architecture (Optional)
ARG CUDA_DOCKER_ARCH=default

# Followed https://github.com/ggerganov/llama.cpp/blob/master/.devops/full-cuda.Dockerfile
# for reference to build llama.cpp with cuda using cmake
# Install dependencies only needed for building
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y git cmake gcc-c++ python3-pip && \
dnf clean all && rm -rf /var/cache/*dnf*

RUN git clone https://github.com/ggerganov/llama.cpp && \
cd llama.cpp && \
# Install Python packages
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"

# Build llama.cpp
RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
# Move llama-cli and llama-server to /usr/bin
mv build/bin/llama-cli /usr/bin/llama-cli && \
mv build/bin/llama-server /usr/bin/llama-server && \
# Move shared libraries to a standard library directory
mv build/ggml/src/libggml.so /usr/lib/libggml.so && \
mv build/src/libllama.so /usr/lib/libllama.so && \
# Update the dynamic linker cache
ldconfig && \
# Clean up
cd / && \
rm -rf llama.cpp
mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \
mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \
cd / && rm -rf llama.cpp

RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
# Build whisper.cpp
RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
# Move whisper binaries to /usr/bin
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
# Move any shared libraries to a standard library directory if needed
if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/libwhisper.so; fi && \
# Update the dynamic linker cache if any shared libraries were moved
ldconfig || true && \
# Clean up
cd / && \
rm -rf whisper.cpp
mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \
if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \
cd / && rm -rf whisper.cpp

# Final runtime image
FROM docker.io/nvidia/cuda:12.6.2-runtime-ubi9

# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
ARG HUGGINGFACE_HUB_VERSION=0.26.2
# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?<version>.*)
ARG OMLMD_VERSION=0.1.6

# Install minimal runtime dependencies
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf*

# Install Python packages in the runtime image
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"

# Copy only necessary files from the build stage
COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/
COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/
COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/

# Update dynamic linker cache
RUN ldconfig || true

0 comments on commit ab17824

Please sign in to comment.