Skip to content

Commit

Permalink
Consistency changes
Browse files Browse the repository at this point in the history
Ensure llama.cpp version is the same accross containers. Removing some duplicate
actions, etc.

Signed-off-by: Eric Curtin <[email protected]>
  • Loading branch information
ericcurtin committed Nov 4, 2024
1 parent 5791050 commit 85ccafe
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 31 deletions.
46 changes: 22 additions & 24 deletions container-images/cuda/Containerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder

# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
ARG HUGGINGFACE_HUB_VERSION=0.26.2
# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?<version>.*)
ARG OMLMD_VERSION=0.1.6
ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555
ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
# renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5

ARG CUDA_DOCKER_ARCH=default

# Install dependencies only needed for building
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y git cmake gcc-c++ python3-pip && \
RUN dnf install -y git cmake gcc-c++ && \
dnf clean all && rm -rf /var/cache/*dnf*

# Install Python packages
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
# Set the temporary installation directory
ENV INSTALL_PREFIX=/tmp/install

# Build llama.cpp
RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \
mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \
cmake --install build && \
cd / && rm -rf llama.cpp

# Build whisper.cpp
RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DBUILD_SHARED_LIBS=NO -DGGML_CUDA=ON \
-DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \
if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \
cmake --install build && \
mv build/bin/main ${INSTALL_PREFIX}/bin/whisper-main && \
mv build/bin/server ${INSTALL_PREFIX}/bin/whisper-server && \
cd / && rm -rf whisper.cpp

# Final runtime image
Expand All @@ -46,16 +46,14 @@ ARG HUGGINGFACE_HUB_VERSION=0.26.2
ARG OMLMD_VERSION=0.1.6

# Install minimal runtime dependencies
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf*
RUN dnf install -y python3 python3-pip nvidia-driver-cuda-libs && \
dnf clean all && \
rm -rf /var/cache/*dnf*

# Install Python packages in the runtime image
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \
"omlmd==${OMLMD_VERSION}"

# Copy only necessary files from the build stage
COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/
COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/
COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/
# Copy the entire installation directory from the builder
COPY --from=builder /tmp/install /usr

# Update dynamic linker cache
RUN ldconfig || true
5 changes: 3 additions & 2 deletions container-images/ramalama/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-DGGML_CCACHE=0 && \
cmake --build build --config Release -j $(nproc) && \
-DBUILD_SHARED_LIBS=NO -DGGML_CCACHE=0 && \
cmake --build build --config Release -j$(nproc) && \
cmake --install build && \
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
cd / && \
Expand Down
9 changes: 6 additions & 3 deletions container-images/rocm/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \
git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
make -j $(nproc) GGML_HIPBLAS=1 && \
mv main /usr/bin/whisper-main && \
mv server /usr/bin/whisper-server && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-DBUILD_SHARED_LIBS=NO -DGGML_HIPBLAS=1 && \
cmake --build build --config Release -j$(nproc) && \
cmake --install build && \
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
cd / && \
rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
/opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp
Expand Down
5 changes: 3 additions & 2 deletions container-images/vulkan/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ RUN git clone https://github.com/ggerganov/llama.cpp && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-DGGML_VULKAN=1 && \
cmake --build build --config Release -j $(nproc) && \
-DBUILD_SHARED_LIBS=NO -DGGML_VULKAN=1 && \
cmake --build build --config Release -j$(nproc) && \
cmake --install build && \
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
cd / && \
Expand Down
4 changes: 4 additions & 0 deletions test/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ main() {
# verify pyproject.toml and setup.py have same version
grep "$(grep "^version =.*" pyproject.toml)" setup.py

# verify llama.cpp version matches
grep "$(grep "ARG LLAMA_CPP_SHA=" container-images/ramalama/Containerfile)" \
container-images/cuda/Containerfile

local os
os="$(uname -s)"
binfile=bin/ramalama
Expand Down

0 comments on commit 85ccafe

Please sign in to comment.