Consistency changes

Ensure llama.cpp version is the same accross containers. Removing some duplicate actions, etc. Signed-off-by: Eric Curtin <[email protected]>
containers · Nov 4, 2024 · 85ccafe · 85ccafe
1 parent 5791050
commit 85ccafe
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 31 deletions.
diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile
@@ -1,40 +1,40 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder
 
-# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?<version>.*)
-ARG HUGGINGFACE_HUB_VERSION=0.26.2
-# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?<version>.*)
-ARG OMLMD_VERSION=0.1.6
-ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555
+ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5
 
 ARG CUDA_DOCKER_ARCH=default
 
 # Install dependencies only needed for building
-RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
-    dnf install -y git cmake gcc-c++ python3-pip && \
+RUN dnf install -y git cmake gcc-c++ && \
     dnf clean all && rm -rf /var/cache/*dnf*
 
-# Install Python packages
-RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
+# Set the temporary installation directory
+ENV INSTALL_PREFIX=/tmp/install
 
 # Build llama.cpp
 RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \
     git reset --hard ${LLAMA_CPP_SHA} && \
-    cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
+    cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} \
+      -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+      -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
     cmake --build build --config Release -j$(nproc) && \
-    mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \
-    mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \
+    cmake --install build && \
     cd / && rm -rf llama.cpp
 
 # Build whisper.cpp
 RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \
     git reset --hard ${WHISPER_CPP_SHA} && \
-    cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
+    cmake -B build -DBUILD_SHARED_LIBS=NO -DGGML_CUDA=ON \
+      -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} \
+      -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+      -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
     cmake --build build --config Release -j$(nproc) && \
-    mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \
-    if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \
+    cmake --install build && \
+    mv build/bin/main ${INSTALL_PREFIX}/bin/whisper-main && \
+    mv build/bin/server ${INSTALL_PREFIX}/bin/whisper-server && \
     cd / && rm -rf whisper.cpp
 
 # Final runtime image
@@ -46,16 +46,14 @@ ARG HUGGINGFACE_HUB_VERSION=0.26.2
 ARG OMLMD_VERSION=0.1.6
 
 # Install minimal runtime dependencies
-RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
-    dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf*
+RUN dnf install -y python3 python3-pip nvidia-driver-cuda-libs && \
+    dnf clean all && \
+    rm -rf /var/cache/*dnf*
 
 # Install Python packages in the runtime image
-RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}"
+RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \
+      "omlmd==${OMLMD_VERSION}"
 
-# Copy only necessary files from the build stage
-COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/
-COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/
-COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/
+# Copy the entire installation directory from the builder
+COPY --from=builder /tmp/install /usr
 
-# Update dynamic linker cache
-RUN ldconfig || true
diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile
@@ -53,8 +53,9 @@ RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
     cd whisper.cpp && \
     git reset --hard ${WHISPER_CPP_SHA} && \
     cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \
-      -DGGML_CCACHE=0 && \
-    cmake --build build --config Release -j $(nproc) && \
+      -DBUILD_SHARED_LIBS=NO -DGGML_CCACHE=0 && \
+    cmake --build build --config Release -j$(nproc) && \
+    cmake --install build && \
     mv build/bin/main /usr/bin/whisper-main && \
     mv build/bin/server /usr/bin/whisper-server && \
     cd / && \

diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile
@@ -29,9 +29,12 @@ RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \
     git clone https://github.com/ggerganov/whisper.cpp.git && \
     cd whisper.cpp && \
     git reset --hard ${WHISPER_CPP_SHA} && \
-    make -j $(nproc) GGML_HIPBLAS=1 && \
-    mv main /usr/bin/whisper-main && \
-    mv server /usr/bin/whisper-server && \
+    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
+      -DBUILD_SHARED_LIBS=NO -DGGML_HIPBLAS=1 && \
+    cmake --build build --config Release -j$(nproc) && \
+    cmake --install build && \
+    mv build/bin/main /usr/bin/whisper-main && \
+    mv build/bin/server /usr/bin/whisper-server && \
     cd / && \
     rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
       /opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp

diff --git a/container-images/vulkan/Containerfile b/container-images/vulkan/Containerfile
@@ -14,8 +14,9 @@ RUN git clone https://github.com/ggerganov/llama.cpp && \
     cd whisper.cpp && \
     git reset --hard ${WHISPER_CPP_SHA} && \
     cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-      -DGGML_VULKAN=1 && \
-    cmake --build build --config Release -j $(nproc) && \
+      -DBUILD_SHARED_LIBS=NO -DGGML_VULKAN=1 && \
+    cmake --build build --config Release -j$(nproc) && \
+    cmake --install build && \
     mv build/bin/main /usr/bin/whisper-main && \
     mv build/bin/server /usr/bin/whisper-server && \
     cd / && \

diff --git a/test/ci.sh b/test/ci.sh
@@ -29,6 +29,10 @@ main() {
   # verify pyproject.toml and setup.py have same version
   grep "$(grep "^version =.*" pyproject.toml)" setup.py
 
+  # verify llama.cpp version matches
+  grep "$(grep "ARG LLAMA_CPP_SHA=" container-images/ramalama/Containerfile)" \
+    container-images/cuda/Containerfile
+
   local os
   os="$(uname -s)"
   binfile=bin/ramalama