diff --git a/.devops/full.Dockerfile b/.devops/cpu.Dockerfile similarity index 58% rename from .devops/full.Dockerfile rename to .devops/cpu.Dockerfile index d93c0be6a70c0..a31238c4cac28 100644 --- a/.devops/full.Dockerfile +++ b/.devops/cpu.Dockerfile @@ -14,7 +14,7 @@ RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VA mkdir -p /app/lib && \ find build -name "*.so" -exec cp {} /app/lib/ \; -FROM ubuntu:$UBUNTU_VERSION as runtime +FROM ubuntu:$UBUNTU_VERSION as full WORKDIR /app @@ -36,3 +36,37 @@ COPY --from=build /app/gguf-py /app/gguf-py ENV LC_ALL=C.utf8 ENTRYPOINT ["/app/tools.sh"] + + +FROM ubuntu:$UBUNTU_VERSION AS light + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y libgomp1 + +COPY --from=build /app/build/bin/llama-cli /app/ +COPY --from=build /app/lib/ /app/ + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/app/llama-cli" ] + + +FROM ubuntu:$UBUNTU_VERSION AS server + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev libgomp1 curl + +COPY --from=build /app/build/bin/llama-server /app/ +COPY --from=build /app/lib/ /app/ + +ENV LC_ALL=C.utf8 +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile new file mode 100644 index 0000000000000..8899e2a3911d8 --- /dev/null +++ b/.devops/cuda.Dockerfile @@ -0,0 +1,90 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=12.6.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +WORKDIR /app + +COPY . . + +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + + +FROM ${BASE_CUDA_RUN_CONTAINER} AS full +COPY --from=build /app /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + python3 \ + python3-pip \ + git \ + libgomp1 \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/.devops/tools.sh"] + +FROM ${BASE_CUDA_RUN_CONTAINER} AS light + +RUN apt-get update \ + && apt-get install -y libgomp1 \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-cli /app/ + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +FROM ${BASE_CUDA_RUN_CONTAINER} AS server + +RUN apt-get update \ + && apt-get install -y libgomp1 curl \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-server /app/ + +WORKDIR /app + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/full-cuda.Dockerfile b/.devops/full-cuda.Dockerfile deleted file mode 100644 index 05bff1bdf6a22..0000000000000 --- a/.devops/full-cuda.Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.6.0 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Use the default CUDA archs if not specified -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) && \ - cp build/bin/* . - -ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full-musa.Dockerfile b/.devops/full-musa.Dockerfile deleted file mode 100644 index 3193fea1e9ae5..0000000000000 --- a/.devops/full-musa.Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 -# Target the MUSA build image -ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_MUSA_DEV_CONTAINER} AS build - -# MUSA architecture to build for (defaults to all supported archs) -ARG MUSA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Use the default MUSA archs if not specified -RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) && \ - cp build/bin/* . - -ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full-rocm.Dockerfile b/.devops/full-rocm.Dockerfile deleted file mode 100644 index df496bcd2b7ee..0000000000000 --- a/.devops/full-rocm.Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -# This needs to generally match the container host's environment. -ARG ROCM_VERSION=5.6 - -# Target the CUDA build image -ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete - -FROM ${BASE_ROCM_DEV_CONTAINER} AS build - -# Unless otherwise specified, we make a fat build. -# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to rocBLAS supported archs. -ARG ROCM_DOCKER_ARCH="\ - gfx803 \ - gfx900 \ - gfx906 \ - gfx908 \ - gfx90a \ - gfx1010 \ - gfx1030 \ - gfx1100 \ - gfx1101 \ - gfx1102" - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Set nvcc architecture -ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} -# Enable ROCm -ENV GGML_HIPBLAS=1 -ENV CC=/opt/rocm/llvm/bin/clang -ENV CXX=/opt/rocm/llvm/bin/clang++ - -# Enable cURL -ENV LLAMA_CURL=1 -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev - -RUN make -j$(nproc) - -ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile new file mode 100644 index 0000000000000..455b5237e4990 --- /dev/null +++ b/.devops/intel.Dockerfile @@ -0,0 +1,86 @@ +ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 + +## Build Image + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build + +ARG GGML_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + fi && \ + echo "Building with dynamic libs" && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +FROM intel/oneapi-basekit:$ONEAPI_VERSION as full +COPY --from=build /app /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + python3 \ + python3-pip \ + git \ + libgomp1 \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/.devops/tools.sh"] + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS light + +RUN apt-get update \ + && apt-get install -y libgomp1 \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-cli /app/ + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS server + +RUN apt-get update \ + && apt-get install -y libgomp1 curl \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-server /app/ + +WORKDIR /app + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/llama-cli-cuda.Dockerfile b/.devops/llama-cli-cuda.Dockerfile deleted file mode 100644 index 7796891d5b53c..0000000000000 --- a/.devops/llama-cli-cuda.Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.6.0 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} -# Target the CUDA runtime image -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential git cmake - -WORKDIR /app - -COPY . . - -# Use the default CUDA archs if not specified -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-cli -j$(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime - -RUN apt-get update && \ - apt-get install -y libgomp1 - -COPY --from=build /app/lib/ / -COPY --from=build /app/build/bin/llama-cli / - -ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-intel.Dockerfile b/.devops/llama-cli-intel.Dockerfile deleted file mode 100644 index 0706f732a98f0..0000000000000 --- a/.devops/llama-cli-intel.Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build - -ARG GGML_SYCL_F16=OFF -RUN apt-get update && \ - apt-get install -y git - -WORKDIR /app - -COPY . . - -RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ - echo "GGML_SYCL_F16 is set" && \ - export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ - fi && \ - echo "Building with static libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \ - ${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \ - cmake --build build --config Release --target llama-cli - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime - -COPY --from=build /app/build/bin/llama-cli /llama-cli - -ENV LC_ALL=C.utf8 - -ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-musa.Dockerfile b/.devops/llama-cli-musa.Dockerfile deleted file mode 100644 index e7c75af20e265..0000000000000 --- a/.devops/llama-cli-musa.Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 -# Target the MUSA build image -ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} -# Target the MUSA runtime image -ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_MUSA_DEV_CONTAINER} AS build - -# MUSA architecture to build for (defaults to all supported archs) -ARG MUSA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential git cmake - -WORKDIR /app - -COPY . . - -# Use the default MUSA archs if not specified -RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-cli -j$(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime - -RUN apt-get update && \ - apt-get install -y libgomp1 - -COPY --from=build /app/lib/ / -COPY --from=build /app/build/bin/llama-cli /llama-cli - -ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-rocm.Dockerfile b/.devops/llama-cli-rocm.Dockerfile deleted file mode 100644 index e60c747bdbf11..0000000000000 --- a/.devops/llama-cli-rocm.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -# This needs to generally match the container host's environment. -ARG ROCM_VERSION=5.6 - -# Target the CUDA build image -ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete - -FROM ${BASE_ROCM_DEV_CONTAINER} AS build - -# Unless otherwise specified, we make a fat build. -# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to rocBLAS supported archs. -ARG ROCM_DOCKER_ARCH="\ - gfx803 \ - gfx900 \ - gfx906 \ - gfx908 \ - gfx90a \ - gfx1010 \ - gfx1030 \ - gfx1100 \ - gfx1101 \ - gfx1102" - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Set nvcc architecture -ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} -# Enable ROCm -ENV GGML_HIPBLAS=1 -ENV CC=/opt/rocm/llvm/bin/clang -ENV CXX=/opt/rocm/llvm/bin/clang++ - -RUN make -j$(nproc) llama-cli - -ENTRYPOINT [ "/app/llama-cli" ] diff --git a/.devops/llama-cli-vulkan.Dockerfile b/.devops/llama-cli-vulkan.Dockerfile deleted file mode 100644 index 92a6e04793491..0000000000000 --- a/.devops/llama-cli-vulkan.Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -ARG UBUNTU_VERSION=jammy - -FROM ubuntu:$UBUNTU_VERSION AS build - -# Install build tools -RUN apt update && apt install -y git build-essential cmake wget libgomp1 - -# Install Vulkan SDK -RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ - wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ - apt update -y && \ - apt-get install -y vulkan-sdk - -# Build it -WORKDIR /app -COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 && \ - cmake --build build --config Release --target llama-cli - -# Clean up -WORKDIR / -RUN cp /app/build/bin/llama-cli /llama-cli && \ - rm -rf /app - -ENV LC_ALL=C.utf8 - -ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli.Dockerfile b/.devops/llama-cli.Dockerfile deleted file mode 100644 index be234d55dce5c..0000000000000 --- a/.devops/llama-cli.Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -FROM ubuntu:$UBUNTU_VERSION AS build - -RUN apt-get update && \ - apt-get install -y build-essential git cmake libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \ - cmake --build build -j $(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib/ \; - -FROM ubuntu:$UBUNTU_VERSION AS runtime - -WORKDIR /app - -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev libgomp1 curl - -COPY --from=build /app/build/bin/llama-cli /app/ -COPY --from=build /app/lib/ /app/ - -ENV LC_ALL=C.utf8 - -ENTRYPOINT [ "/app/llama-cli" ] diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile deleted file mode 100644 index bf8a198f99f73..0000000000000 --- a/.devops/llama-server-cuda.Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.6.0 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} -# Target the CUDA runtime image -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential git cmake libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -# Use the default CUDA archs if not specified -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-server -j$(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime - -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev libgomp1 curl - -COPY --from=build /app/lib/ / -COPY --from=build /app/build/bin/llama-server /llama-server - -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile deleted file mode 100644 index b503b8cfe1084..0000000000000 --- a/.devops/llama-server-intel.Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build - -ARG GGML_SYCL_F16=OFF -RUN apt-get update && \ - apt-get install -y git libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ - echo "GGML_SYCL_F16 is set" && \ - export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ - fi && \ - echo "Building with dynamic libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ - cmake --build build --config Release --target llama-server - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime - -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev curl - -COPY --from=build /app/build/bin/llama-server /llama-server - -ENV LC_ALL=C.utf8 -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-musa.Dockerfile b/.devops/llama-server-musa.Dockerfile deleted file mode 100644 index cebe51d42fa95..0000000000000 --- a/.devops/llama-server-musa.Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 -# Target the MUSA build image -ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} -# Target the MUSA runtime image -ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_MUSA_DEV_CONTAINER} AS build - -# MUSA architecture to build for (defaults to all supported archs) -ARG MUSA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential git cmake libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -# Use the default MUSA archs if not specified -RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-server -j$(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime - -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev libgomp1 curl - -COPY --from=build /app/lib/ / -COPY --from=build /app/build/bin/llama-server /llama-server - -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile deleted file mode 100644 index 8553af75b61fc..0000000000000 --- a/.devops/llama-server-rocm.Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -# This needs to generally match the container host's environment. -ARG ROCM_VERSION=5.6 - -# Target the CUDA build image -ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete - -FROM ${BASE_ROCM_DEV_CONTAINER} AS build - -# Unless otherwise specified, we make a fat build. -# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to rocBLAS supported archs. -ARG ROCM_DOCKER_ARCH="\ - gfx803 \ - gfx900 \ - gfx906 \ - gfx908 \ - gfx90a \ - gfx1010 \ - gfx1030 \ - gfx1100 \ - gfx1101 \ - gfx1102" - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Set nvcc architecture -ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} -# Enable ROCm -ENV GGML_HIPBLAS=1 -ENV CC=/opt/rocm/llvm/bin/clang -ENV CXX=/opt/rocm/llvm/bin/clang++ -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -# Enable cURL -ENV LLAMA_CURL=1 -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev curl - -RUN make -j$(nproc) llama-server - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/llama-server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile deleted file mode 100644 index 6aa7867791a38..0000000000000 --- a/.devops/llama-server-vulkan.Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -ARG UBUNTU_VERSION=jammy - -FROM ubuntu:$UBUNTU_VERSION AS build - -# Install build tools -RUN apt update && apt install -y git build-essential cmake wget - -# Install Vulkan SDK and cURL -RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ - wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ - apt update -y && \ - apt-get install -y vulkan-sdk libcurl4-openssl-dev curl - -# Build it -WORKDIR /app -COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ - cmake --build build --config Release --target llama-server - -# Clean up -WORKDIR / -RUN cp /app/build/bin/llama-server /llama-server && \ - rm -rf /app - -ENV LC_ALL=C.utf8 -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server.Dockerfile b/.devops/llama-server.Dockerfile deleted file mode 100644 index 72ccde2feaeb9..0000000000000 --- a/.devops/llama-server.Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -FROM ubuntu:$UBUNTU_VERSION AS build - -RUN apt-get update && \ - apt-get install -y build-essential git cmake libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \ - cmake --build build -j $(nproc) && \ - mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib/ \; - -FROM ubuntu:$UBUNTU_VERSION AS runtime - -WORKDIR /app - -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev libgomp1 curl - -COPY --from=build /app/build/bin/llama-server /app/ -COPY --from=build /app/lib/ /app/ - -ENV LC_ALL=C.utf8 -# Must be set to 0.0.0.0 so it can listen to requests from host machine -ENV LLAMA_ARG_HOST=0.0.0.0 - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile new file mode 100644 index 0000000000000..f1dde9f39ac39 --- /dev/null +++ b/.devops/musa.Dockerfile @@ -0,0 +1,105 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.0 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build + +# MUSA architecture to build for (defaults to all supported archs) +ARG MUSA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + cmake \ + python3 \ + python3-pip \ + git \ + libcurl4-openssl-dev \ + libgomp1 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Use the default MUSA archs if not specified +RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + + +FROM ${BASE_MUSA_RUN_CONTAINER} AS full + +COPY --from=build /app /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + python3 \ + python3-pip \ + git \ + libgomp1 \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/.devops/tools.sh"] + +FROM ${BASE_MUSA_RUN_CONTAINER} AS light + +RUN apt-get update \ + && apt-get install -y libgomp1 \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-cli /app/ + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +FROM ${BASE_MUSA_RUN_CONTAINER} AS server + +RUN apt-get update \ + && apt-get install -y libgomp1 curl \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-server /app/ + +WORKDIR /app + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile new file mode 100644 index 0000000000000..5c1ee6029de78 --- /dev/null +++ b/.devops/rocm.Dockerfile @@ -0,0 +1,113 @@ +ARG UBUNTU_VERSION=24.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=6.3 +ARG AMDGPU_VERSION=6.3 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +### Build image +FROM ${BASE_ROCM_DEV_CONTAINER} AS build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported +# gfx906 is deprecated +#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html + +ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' + +# Set nvcc architectured +ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +# ENV CC=/opt/rocm/llvm/bin/clang +# ENV CXX=/opt/rocm/llvm/bin/clang++ + +RUN apt-get update \ + && apt-get install -y \ + build-essential \ + cmake \ + git \ + libcurl4-openssl-dev \ + curl \ + libgomp1 + +WORKDIR /app + +COPY . . + +RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ + cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ + && cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . && \ + mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + + +FROM ${BASE_ROCM_DEV_CONTAINER} AS full-rocm + +COPY --from=build /app /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + curl \ + libgomp1 \ + python3-pip \ + python3 \ + python3-wheel\ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/.devops/tools.sh"] + + +### Light-ROCm, CLI only +FROM ${BASE_ROCM_DEV_CONTAINER} AS light-rocm + +RUN apt-get update \ + && apt-get install -y libgomp1 \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/llama-cli /app/ + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server-ROCm, Server only +FROM ${BASE_ROCM_DEV_CONTAINER} AS server-rocm + +ENV LLAMA_ARG_HOST=0.0.0.0 + +RUN apt-get update \ + && apt-get install -y libgomp1 curl \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/llama-server /app/ + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] + diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile new file mode 100644 index 0000000000000..8a3e5d549b52d --- /dev/null +++ b/.devops/vulkan.Dockerfile @@ -0,0 +1,87 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION AS build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK and cURL +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk libcurl4-openssl-dev curl + +# Build it +WORKDIR /app + +COPY . . + +RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + + +FROM ubuntu:$UBUNTU_VERSION AS full + +COPY --from=build /app /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + python3 \ + python3-pip \ + git \ + libgomp1 \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/.devops/tools.sh"] + + +FROM ubuntu:$UBUNTU_VERSION AS light + +RUN apt-get update \ + && apt-get install -y libgomp1 \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-cli /app/ + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + + +FROM ubuntu:$UBUNTU_VERSION AS server + +RUN apt-get update \ + && apt-get install -y libgomp1 curl \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app/ +COPY --from=build /app/build/bin/llama-server /app/ + +WORKDIR /app + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bc2e5020de25c..edd5ea1ef1cfb 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,21 +34,14 @@ jobs: strategy: matrix: config: - - { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" } - - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" } + # Multi-stage build + - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false} + - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" } + #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true } steps: - name: Check out the repo uses: actions/checkout@v4 @@ -56,10 +49,10 @@ jobs: fetch-depth: 0 # preserve git history, so we can determine the build number - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Log in to Docker Hub uses: docker/login-action@v2 @@ -86,18 +79,40 @@ jobs: fi # list all tags possible - TAGS="" - TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}," - TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}" - - echo "output_tags=$TAGS" >> $GITHUB_OUTPUT - echo "output_tags=$TAGS" # print out for debugging + if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then + FULLTAGS="" + FULLTAGS="${FULLTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:full," + FULLTAGS="${FULLTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:full-${TAG_POSTFIX}" + LIGHTTAGS="" + LIGHTTAGS="${LIGHTTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:light," + LIGHTTAGS="${LIGHTTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:light-${TAG_POSTFIX}" + SERVERTAGS="" + SERVERTAGS="${SERVERTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:server," + SERVERTAGS="${SERVERTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:server-${TAG_POSTFIX}" + else + FULLTAGS="" + FULLTAGS="${FULLTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:full-${{ matrix.config.tag }}," + FULLTAGS="${FULLTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:full-${{ matrix.config.tag }}-${TAG_POSTFIX}" + LIGHTTAGS="" + LIGHTTAGS="${LIGHTTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:light-${{ matrix.config.tag }}," + LIGHTTAGS="${LIGHTTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:light-${{ matrix.config.tag }}-${TAG_POSTFIX}" + SERVERTAGS="" + SERVERTAGS="${SERVERTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:server-${{ matrix.config.tag }}," + SERVERTAGS="${SERVERTAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:server-${{ matrix.config.tag }}-${TAG_POSTFIX}" + fi + echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT + echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT + echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT + echo "full_output_tags=$FULLTAGS" # print out for debugging + echo "light_output_tags=$LIGHTTAGS" # print out for debugging + echo "server_output_tags=$SERVERTAGS" # print out for debugging env: GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example - name: Free Disk Space (Ubuntu) + if: ${{ matrix.config.free_disk_space == true }} uses: jlumbroso/free-disk-space@main with: # this might remove tools that are actually needed, @@ -113,13 +128,41 @@ jobs: docker-images: true swap-storage: true - - name: Build and push Docker image (tagged + versioned) - if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + - name: Build and push full Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.full_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: full + cache-to: type=inline,mode=max + + - name: Build and push light Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.light_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: light + cache-to: type=inline,mode=max + + - name: Build and push server Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} uses: docker/build-push-action@v6 with: context: . push: true platforms: ${{ matrix.config.platforms }} # tag list is generated from step above - tags: ${{ steps.tag.outputs.output_tags }} + tags: ${{ steps.tag.outputs.server_output_tags }} file: ${{ matrix.config.dockerfile }} + target: server + cache-to: type=inline,mode=max