Skip to content

Commit

Permalink
Consistency changes
Browse files Browse the repository at this point in the history
Ensure llama.cpp version is the same accross containers. Removing some duplicate
actions, etc.

Signed-off-by: Eric Curtin <[email protected]>
  • Loading branch information
ericcurtin committed Nov 5, 2024
1 parent 5791050 commit 210e6fe
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 98 deletions.
54 changes: 16 additions & 38 deletions container-images/cuda/Containerfile
Original file line number Diff line number Diff line change
@@ -1,41 +1,21 @@
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder

# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
ARG HUGGINGFACE_HUB_VERSION=0.26.2
# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?<version>.*)
ARG OMLMD_VERSION=0.1.6
ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555
ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
# renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5

ARG CUDA_DOCKER_ARCH=default

# Install dependencies only needed for building
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y git cmake gcc-c++ python3-pip && \
RUN dnf install -y git cmake gcc-c++ && \
dnf clean all && rm -rf /var/cache/*dnf*

# Install Python packages
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"

# Build llama.cpp
RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \
mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \
cd / && rm -rf llama.cpp

# Build whisper.cpp
RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \
if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \
cd / && rm -rf whisper.cpp
# Set the temporary installation directory
ENV INSTALL_PREFIX=/tmp/install

COPY ../scripts /scripts
RUN chmod +x /scripts/*.sh && \
/scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
"$INSTALL_PREFIX" "-DGGML_CUDA=1"

# Final runtime image
FROM docker.io/nvidia/cuda:12.6.2-runtime-ubi9
Expand All @@ -46,16 +26,14 @@ ARG HUGGINGFACE_HUB_VERSION=0.26.2
ARG OMLMD_VERSION=0.1.6

# Install minimal runtime dependencies
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf*
RUN dnf install -y python3 python3-pip && \
dnf clean all && \
rm -rf /var/cache/*dnf*

# Install Python packages in the runtime image
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \
"omlmd==${OMLMD_VERSION}"

# Copy only necessary files from the build stage
COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/
COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/
COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/
# Copy the entire installation directory from the builder
COPY --from=builder /tmp/install /usr

# Update dynamic linker cache
RUN ldconfig || true
24 changes: 4 additions & 20 deletions container-images/ramalama/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,10 @@ RUN dnf install -y glslang && \
dnf clean all && \
rm -rf /var/cache/*dnf*

RUN git clone --recursive https://github.com/ggerganov/llama.cpp && \
cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-DGGML_CCACHE=0 && \
cmake --build build --config Release -j $(nproc) && \
cmake --install build && \
cd / && \
rm -rf llama.cpp

RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-DGGML_CCACHE=0 && \
cmake --build build --config Release -j $(nproc) && \
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
cd / && \
rm -rf whisper.cpp
COPY ../scripts /scripts
RUN chmod +x /scripts/*.sh && \
/scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
"/usr" "-DGGML_KOMPUTE=1"

ENV WHISPER_CPP_SHA=${WHISPER_CPP_SHA}
ENV LLAMA_CPP_SHA=${LLAMA_CPP_SHA}
25 changes: 7 additions & 18 deletions container-images/rocm/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ RUN /usr/bin/python3 --version
ARG ROCM_VERSION=6.2.2
ARG AMDGPU_VERSION=6.2.2

COPY amdgpu.repo /etc/yum.repos.d/
COPY rocm.repo /etc/yum.repos.d/
COPY rocm/amdgpu.repo /etc/yum.repos.d/
COPY rocm/rocm.repo /etc/yum.repos.d/

RUN dnf config-manager --add-repo \
https://mirror.stream.centos.org/9-stream/AppStream/$(uname -m)/os/
Expand All @@ -16,23 +16,12 @@ RUN curl --retry 8 --retry-all-errors -o \
cat /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official
RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official

COPY scripts /scripts
RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \
dnf clean all && \
git clone https://github.com/ggerganov/llama.cpp && \
cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-DGGML_HIPBLAS=1 && \
cmake --build build --config Release -j $(nproc) && \
cmake --install build && \
cd / && \
git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
make -j $(nproc) GGML_HIPBLAS=1 && \
mv main /usr/bin/whisper-main && \
mv server /usr/bin/whisper-server && \
cd / && \
chmod +x /scripts/*.sh && \
/scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
"/usr" "-DGGML_HIPBLAS=1" && \
rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
/opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp
/opt/rocm-*/lib/rocblas/library/*gfx9*

34 changes: 34 additions & 0 deletions container-images/scripts/build_llama_and_whisper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

main() {
set -e

local llama_cpp_sha="$1"
local whisper_cpp_sha="$2"
local install_prefix="$3"
local ggml_flag="$4"
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
git reset --hard "$llama_cpp_sha"
cmake -B build -DGGML_CCACHE=0 $ggml_flag \
-DCMAKE_INSTALL_PREFIX="$install_prefix"
cmake --build build --config Release -j$(nproc)
cmake --install build
cd ..
rm -rf llama.cpp

git clone https://github.com/ggerganov/whisper.cpp
cd whisper.cpp
git reset --hard "$whisper_cpp_sha"
cmake -B build -DGGML_CCACHE=0 $ggml_flag \
-DBUILD_SHARED_LIBS=NO -DCMAKE_INSTALL_PREFIX="$install_prefix"
cmake --build build --config Release -j$(nproc)
cmake --install build
mv build/bin/main "$install_prefix/bin/whisper-main"
mv build/bin/server "$install_prefix/bin/whisper-server"
cd ..
rm -rf whisper.cpp
}

main "$@"

24 changes: 4 additions & 20 deletions container-images/vulkan/Containerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,7 @@
FROM quay.io/ramalama/ramalama:latest

RUN /usr/bin/python3 --version

RUN git clone https://github.com/ggerganov/llama.cpp && \
cd llama.cpp && \
git reset --hard ${LLAMA_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-DGGML_VULKAN=1 && \
cmake --build build --config Release -j $(nproc) && \
cmake --install build && \
cd / && \
git clone https://github.com/ggerganov/whisper.cpp.git && \
cd whisper.cpp && \
git reset --hard ${WHISPER_CPP_SHA} && \
cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-DGGML_VULKAN=1 && \
cmake --build build --config Release -j $(nproc) && \
mv build/bin/main /usr/bin/whisper-main && \
mv build/bin/server /usr/bin/whisper-server && \
cd / && \
rm -rf llama.cpp whisper.cpp
COPY ../scripts /scripts
RUN chmod +x /scripts/*.sh && \
/scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
"/usr" "-DGGML_VULKAN=1"

5 changes: 3 additions & 2 deletions container_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ select_container_manager() {

add_build_platform() {
conman_build+=("build" "--platform" "$platform")
conman_build+=("-t" "quay.io/ramalama/$image_name" ".")
conman_build+=("-t" "quay.io/ramalama/$image_name")
conman_build+=("-f" "container-images/$image_name/Containerfile" ".")
}

rm_container_image() {
Expand All @@ -30,7 +31,7 @@ rm_container_image() {
}

build() {
cd "$1"
cd "container-images"
local image_name
image_name=$(echo "$1" | sed "s#container-images/##g")
local conman_build=("${conman[@]}")
Expand Down
4 changes: 4 additions & 0 deletions test/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ main() {
# verify pyproject.toml and setup.py have same version
grep "$(grep "^version =.*" pyproject.toml)" setup.py

# verify llama.cpp version matches
grep "$(grep "ARG LLAMA_CPP_SHA=" container-images/ramalama/Containerfile)" \
container-images/cuda/Containerfile

local os
os="$(uname -s)"
binfile=bin/ramalama
Expand Down

0 comments on commit 210e6fe

Please sign in to comment.