Consistency changes

Ensure llama.cpp version is the same accross containers. Removing some duplicate actions, etc. Signed-off-by: Eric Curtin <[email protected]>
containers · Nov 5, 2024 · 210e6fe · 210e6fe
1 parent 5791050
commit 210e6fe
Show file tree

Hide file tree

Showing 7 changed files with 72 additions and 98 deletions.
diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile
@@ -1,41 +1,21 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder
 
-# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
-ARG HUGGINGFACE_HUB_VERSION=0.26.2
-# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?<version>.*)
-ARG OMLMD_VERSION=0.1.6
-ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555
+ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5
 
-ARG CUDA_DOCKER_ARCH=default
-
 # Install dependencies only needed for building
-RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
-    dnf install -y git cmake gcc-c++ python3-pip && \
+RUN dnf install -y git cmake gcc-c++ && \
     dnf clean all && rm -rf /var/cache/*dnf*
 
-# Install Python packages
-RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
-
-# Build llama.cpp
-RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \
-    git reset --hard ${LLAMA_CPP_SHA} && \
-    cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
-    cmake --build build --config Release -j$(nproc) && \
-    mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \
-    mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \
-    cd / && rm -rf llama.cpp
-
-# Build whisper.cpp
-RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \
-    git reset --hard ${WHISPER_CPP_SHA} && \
-    cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
-    cmake --build build --config Release -j$(nproc) && \
-    mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \
-    if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \
-    cd / && rm -rf whisper.cpp
+# Set the temporary installation directory
+ENV INSTALL_PREFIX=/tmp/install
+
+COPY ../scripts /scripts
+RUN chmod +x /scripts/*.sh && \
+    /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
+      "$INSTALL_PREFIX" "-DGGML_CUDA=1"
 
 # Final runtime image
 FROM docker.io/nvidia/cuda:12.6.2-runtime-ubi9
@@ -46,16 +26,14 @@ ARG HUGGINGFACE_HUB_VERSION=0.26.2
 ARG OMLMD_VERSION=0.1.6
 
 # Install minimal runtime dependencies
-RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
-    dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf*
+RUN dnf install -y python3 python3-pip && \
+    dnf clean all && \
+    rm -rf /var/cache/*dnf*
 
 # Install Python packages in the runtime image
-RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
+RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \
+      "omlmd==${OMLMD_VERSION}"
 
-# Copy only necessary files from the build stage
-COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/
-COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/
-COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/
+# Copy the entire installation directory from the builder
+COPY --from=builder /tmp/install /usr
 
-# Update dynamic linker cache
-RUN ldconfig || true
diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile
@@ -39,26 +39,10 @@ RUN dnf install -y glslang && \
     dnf clean all && \
     rm -rf /var/cache/*dnf*
 
-RUN git clone --recursive https://github.com/ggerganov/llama.cpp && \
-    cd llama.cpp && \
-    git reset --hard ${LLAMA_CPP_SHA} && \
-    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-      -DGGML_CCACHE=0 && \
-    cmake --build build --config Release -j $(nproc) && \
-    cmake --install build && \
-    cd / && \
-    rm -rf llama.cpp
-
-RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
-    cd whisper.cpp && \
-    git reset --hard ${WHISPER_CPP_SHA} && \
-    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-      -DGGML_CCACHE=0 && \
-    cmake --build build --config Release -j $(nproc) && \
-    mv build/bin/main /usr/bin/whisper-main && \
-    mv build/bin/server /usr/bin/whisper-server && \
-    cd / && \
-    rm -rf whisper.cpp
+COPY ../scripts /scripts
+RUN chmod +x /scripts/*.sh && \
+    /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
+      "/usr" "-DGGML_KOMPUTE=1"
 
 ENV WHISPER_CPP_SHA=${WHISPER_CPP_SHA}
 ENV LLAMA_CPP_SHA=${LLAMA_CPP_SHA}
diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile
@@ -5,8 +5,8 @@ RUN /usr/bin/python3 --version
 ARG ROCM_VERSION=6.2.2
 ARG AMDGPU_VERSION=6.2.2
 
-COPY amdgpu.repo /etc/yum.repos.d/
-COPY rocm.repo /etc/yum.repos.d/
+COPY rocm/amdgpu.repo /etc/yum.repos.d/
+COPY rocm/rocm.repo /etc/yum.repos.d/
 
 RUN dnf config-manager --add-repo \
       https://mirror.stream.centos.org/9-stream/AppStream/$(uname -m)/os/
@@ -16,23 +16,12 @@ RUN curl --retry 8 --retry-all-errors -o \
       cat /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official
 RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official
 
+COPY scripts /scripts
 RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \
     dnf clean all && \
-    git clone https://github.com/ggerganov/llama.cpp && \
-    cd llama.cpp && \
-    git reset --hard ${LLAMA_CPP_SHA} && \
-    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-      -DGGML_HIPBLAS=1 && \
-    cmake --build build --config Release -j $(nproc) && \
-    cmake --install build && \
-    cd / && \
-    git clone https://github.com/ggerganov/whisper.cpp.git && \
-    cd whisper.cpp && \
-    git reset --hard ${WHISPER_CPP_SHA} && \
-    make -j $(nproc) GGML_HIPBLAS=1 && \
-    mv main /usr/bin/whisper-main && \
-    mv server /usr/bin/whisper-server && \
-    cd / && \
+    chmod +x /scripts/*.sh && \
+    /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
+      "/usr" "-DGGML_HIPBLAS=1" && \
     rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
-      /opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp
+      /opt/rocm-*/lib/rocblas/library/*gfx9*
 
diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+main() {
+  set -e
+
+  local llama_cpp_sha="$1"
+  local whisper_cpp_sha="$2"
+  local install_prefix="$3"
+  local ggml_flag="$4"
+  git clone https://github.com/ggerganov/llama.cpp
+  cd llama.cpp
+  git reset --hard "$llama_cpp_sha"
+  cmake -B build -DGGML_CCACHE=0 $ggml_flag \
+    -DCMAKE_INSTALL_PREFIX="$install_prefix"
+  cmake --build build --config Release -j$(nproc)
+  cmake --install build
+  cd ..
+  rm -rf llama.cpp
+
+  git clone https://github.com/ggerganov/whisper.cpp
+  cd whisper.cpp
+  git reset --hard "$whisper_cpp_sha"
+  cmake -B build -DGGML_CCACHE=0 $ggml_flag \
+    -DBUILD_SHARED_LIBS=NO -DCMAKE_INSTALL_PREFIX="$install_prefix"
+  cmake --build build --config Release -j$(nproc)
+  cmake --install build
+  mv build/bin/main "$install_prefix/bin/whisper-main"
+  mv build/bin/server "$install_prefix/bin/whisper-server"
+  cd ..
+  rm -rf whisper.cpp
+}
+
+main "$@"
+
diff --git a/container-images/vulkan/Containerfile b/container-images/vulkan/Containerfile
@@ -1,23 +1,7 @@
 FROM quay.io/ramalama/ramalama:latest
 
-RUN /usr/bin/python3 --version
-
-RUN git clone https://github.com/ggerganov/llama.cpp && \
-    cd llama.cpp && \
-    git reset --hard ${LLAMA_CPP_SHA} && \
-    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-      -DGGML_VULKAN=1 && \
-    cmake --build build --config Release -j $(nproc) && \
-    cmake --install build && \
-    cd / && \
-    git clone https://github.com/ggerganov/whisper.cpp.git && \
-    cd whisper.cpp && \
-    git reset --hard ${WHISPER_CPP_SHA} && \
-    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-      -DGGML_VULKAN=1 && \
-    cmake --build build --config Release -j $(nproc) && \
-    mv build/bin/main /usr/bin/whisper-main && \
-    mv build/bin/server /usr/bin/whisper-server && \
-    cd / && \
-    rm -rf llama.cpp whisper.cpp
+COPY ../scripts /scripts
+RUN chmod +x /scripts/*.sh && \
+    /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \
+      "/usr" "-DGGML_VULKAN=1"
 
diff --git a/container_build.sh b/container_build.sh
@@ -20,7 +20,8 @@ select_container_manager() {
 
 add_build_platform() {
   conman_build+=("build" "--platform" "$platform")
-  conman_build+=("-t" "quay.io/ramalama/$image_name" ".")
+  conman_build+=("-t" "quay.io/ramalama/$image_name")
+  conman_build+=("-f" "container-images/$image_name/Containerfile" ".")
 }
 
 rm_container_image() {
@@ -30,7 +31,7 @@ rm_container_image() {
 }
 
 build() {
-  cd "$1"
+  cd "container-images"
   local image_name
   image_name=$(echo "$1" | sed "s#container-images/##g")
   local conman_build=("${conman[@]}")

diff --git a/test/ci.sh b/test/ci.sh
@@ -29,6 +29,10 @@ main() {
   # verify pyproject.toml and setup.py have same version
   grep "$(grep "^version =.*" pyproject.toml)" setup.py
 
+  # verify llama.cpp version matches
+  grep "$(grep "ARG LLAMA_CPP_SHA=" container-images/ramalama/Containerfile)" \
+    container-images/cuda/Containerfile
+
   local os
   os="$(uname -s)"
   binfile=bin/ramalama