diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile index 244093f2..fc84016e 100644 --- a/container-images/cuda/Containerfile +++ b/container-images/cuda/Containerfile @@ -1,41 +1,21 @@ # Base image with CUDA for compilation FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder -# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?.*) -ARG HUGGINGFACE_HUB_VERSION=0.26.2 -# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?.*) -ARG OMLMD_VERSION=0.1.6 -ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555 +ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5 -ARG CUDA_DOCKER_ARCH=default - # Install dependencies only needed for building -RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ - dnf install -y git cmake gcc-c++ python3-pip && \ +RUN dnf install -y git cmake gcc-c++ && \ dnf clean all && rm -rf /var/cache/*dnf* -# Install Python packages -RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}" - -# Build llama.cpp -RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \ - git reset --hard ${LLAMA_CPP_SHA} && \ - cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) && \ - mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \ - mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \ - cd / && rm -rf llama.cpp - -# Build whisper.cpp -RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \ - git reset --hard ${WHISPER_CPP_SHA} && \ - cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) && \ - mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \ - if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \ - cd / && rm -rf whisper.cpp +# Set the temporary installation directory +ENV INSTALL_PREFIX=/tmp/install + +COPY ../scripts /scripts +RUN chmod +x /scripts/*.sh && \ + /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \ + "$INSTALL_PREFIX" "-DGGML_CUDA=1" # Final runtime image FROM docker.io/nvidia/cuda:12.6.2-runtime-ubi9 @@ -46,16 +26,14 @@ ARG HUGGINGFACE_HUB_VERSION=0.26.2 ARG OMLMD_VERSION=0.1.6 # Install minimal runtime dependencies -RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ - dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf* +RUN dnf install -y python3 python3-pip && \ + dnf clean all && \ + rm -rf /var/cache/*dnf* # Install Python packages in the runtime image -RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}" +RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \ + "omlmd==${OMLMD_VERSION}" -# Copy only necessary files from the build stage -COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/ -COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/ -COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/ +# Copy the entire installation directory from the builder +COPY --from=builder /tmp/install /usr -# Update dynamic linker cache -RUN ldconfig || true \ No newline at end of file diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile index 3369dd8b..3425679d 100644 --- a/container-images/ramalama/Containerfile +++ b/container-images/ramalama/Containerfile @@ -39,26 +39,10 @@ RUN dnf install -y glslang && \ dnf clean all && \ rm -rf /var/cache/*dnf* -RUN git clone --recursive https://github.com/ggerganov/llama.cpp && \ - cd llama.cpp && \ - git reset --hard ${LLAMA_CPP_SHA} && \ - cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \ - -DGGML_CCACHE=0 && \ - cmake --build build --config Release -j $(nproc) && \ - cmake --install build && \ - cd / && \ - rm -rf llama.cpp - -RUN git clone https://github.com/ggerganov/whisper.cpp.git && \ - cd whisper.cpp && \ - git reset --hard ${WHISPER_CPP_SHA} && \ - cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_KOMPUTE=1 \ - -DGGML_CCACHE=0 && \ - cmake --build build --config Release -j $(nproc) && \ - mv build/bin/main /usr/bin/whisper-main && \ - mv build/bin/server /usr/bin/whisper-server && \ - cd / && \ - rm -rf whisper.cpp +COPY ../scripts /scripts +RUN chmod +x /scripts/*.sh && \ + /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \ + "/usr" "-DGGML_KOMPUTE=1" ENV WHISPER_CPP_SHA=${WHISPER_CPP_SHA} ENV LLAMA_CPP_SHA=${LLAMA_CPP_SHA} diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile index 8cd3e1bf..b32f6be2 100644 --- a/container-images/rocm/Containerfile +++ b/container-images/rocm/Containerfile @@ -5,8 +5,8 @@ RUN /usr/bin/python3 --version ARG ROCM_VERSION=6.2.2 ARG AMDGPU_VERSION=6.2.2 -COPY amdgpu.repo /etc/yum.repos.d/ -COPY rocm.repo /etc/yum.repos.d/ +COPY rocm/amdgpu.repo /etc/yum.repos.d/ +COPY rocm/rocm.repo /etc/yum.repos.d/ RUN dnf config-manager --add-repo \ https://mirror.stream.centos.org/9-stream/AppStream/$(uname -m)/os/ @@ -16,23 +16,12 @@ RUN curl --retry 8 --retry-all-errors -o \ cat /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official +COPY scripts /scripts RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \ dnf clean all && \ - git clone https://github.com/ggerganov/llama.cpp && \ - cd llama.cpp && \ - git reset --hard ${LLAMA_CPP_SHA} && \ - cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \ - -DGGML_HIPBLAS=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cmake --install build && \ - cd / && \ - git clone https://github.com/ggerganov/whisper.cpp.git && \ - cd whisper.cpp && \ - git reset --hard ${WHISPER_CPP_SHA} && \ - make -j $(nproc) GGML_HIPBLAS=1 && \ - mv main /usr/bin/whisper-main && \ - mv server /usr/bin/whisper-server && \ - cd / && \ + chmod +x /scripts/*.sh && \ + /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \ + "/usr" "-DGGML_HIPBLAS=1" && \ rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \ - /opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp + /opt/rocm-*/lib/rocblas/library/*gfx9* diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh new file mode 100644 index 00000000..deb38d19 --- /dev/null +++ b/container-images/scripts/build_llama_and_whisper.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +main() { + set -e + + local llama_cpp_sha="$1" + local whisper_cpp_sha="$2" + local install_prefix="$3" + local ggml_flag="$4" + git clone https://github.com/ggerganov/llama.cpp + cd llama.cpp + git reset --hard "$llama_cpp_sha" + cmake -B build -DGGML_CCACHE=0 $ggml_flag \ + -DCMAKE_INSTALL_PREFIX="$install_prefix" + cmake --build build --config Release -j$(nproc) + cmake --install build + cd .. + rm -rf llama.cpp + + git clone https://github.com/ggerganov/whisper.cpp + cd whisper.cpp + git reset --hard "$whisper_cpp_sha" + cmake -B build -DGGML_CCACHE=0 $ggml_flag \ + -DBUILD_SHARED_LIBS=NO -DCMAKE_INSTALL_PREFIX="$install_prefix" + cmake --build build --config Release -j$(nproc) + cmake --install build + mv build/bin/main "$install_prefix/bin/whisper-main" + mv build/bin/server "$install_prefix/bin/whisper-server" + cd .. + rm -rf whisper.cpp +} + +main "$@" + diff --git a/container-images/vulkan/Containerfile b/container-images/vulkan/Containerfile index 96719007..5d9eaacc 100644 --- a/container-images/vulkan/Containerfile +++ b/container-images/vulkan/Containerfile @@ -1,23 +1,7 @@ FROM quay.io/ramalama/ramalama:latest -RUN /usr/bin/python3 --version - -RUN git clone https://github.com/ggerganov/llama.cpp && \ - cd llama.cpp && \ - git reset --hard ${LLAMA_CPP_SHA} && \ - cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \ - -DGGML_VULKAN=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cmake --install build && \ - cd / && \ - git clone https://github.com/ggerganov/whisper.cpp.git && \ - cd whisper.cpp && \ - git reset --hard ${WHISPER_CPP_SHA} && \ - cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \ - -DGGML_VULKAN=1 && \ - cmake --build build --config Release -j $(nproc) && \ - mv build/bin/main /usr/bin/whisper-main && \ - mv build/bin/server /usr/bin/whisper-server && \ - cd / && \ - rm -rf llama.cpp whisper.cpp +COPY ../scripts /scripts +RUN chmod +x /scripts/*.sh && \ + /scripts/build_llama_and_whisper.sh "$LLAMA_CPP_SHA" "$WHISPER_CPP_SHA" \ + "/usr" "-DGGML_VULKAN=1" diff --git a/container_build.sh b/container_build.sh index fcf1e341..d92b9573 100755 --- a/container_build.sh +++ b/container_build.sh @@ -20,7 +20,8 @@ select_container_manager() { add_build_platform() { conman_build+=("build" "--platform" "$platform") - conman_build+=("-t" "quay.io/ramalama/$image_name" ".") + conman_build+=("-t" "quay.io/ramalama/$image_name") + conman_build+=("-f" "container-images/$image_name/Containerfile" ".") } rm_container_image() { @@ -30,7 +31,7 @@ rm_container_image() { } build() { - cd "$1" + cd "container-images" local image_name image_name=$(echo "$1" | sed "s#container-images/##g") local conman_build=("${conman[@]}") diff --git a/test/ci.sh b/test/ci.sh index 10b217b1..6fe171f7 100755 --- a/test/ci.sh +++ b/test/ci.sh @@ -29,6 +29,10 @@ main() { # verify pyproject.toml and setup.py have same version grep "$(grep "^version =.*" pyproject.toml)" setup.py + # verify llama.cpp version matches + grep "$(grep "ARG LLAMA_CPP_SHA=" container-images/ramalama/Containerfile)" \ + container-images/cuda/Containerfile + local os os="$(uname -s)" binfile=bin/ramalama