Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ggerganov/llama.cpp into convert-…
Browse files Browse the repository at this point in the history
…bf16-fix
  • Loading branch information
CISC committed Aug 1, 2024
2 parents 675a741 + c8a0090 commit 2b74648
Show file tree
Hide file tree
Showing 341 changed files with 44,059 additions and 162,712 deletions.
2 changes: 1 addition & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build
FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
Expand Down
2 changes: 1 addition & 1 deletion .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
Expand Down
2 changes: 1 addition & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build
FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
Expand All @@ -25,7 +25,7 @@ ENV GGML_CUDA=1

RUN make -j$(nproc) llama-cli

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1
Expand Down
8 changes: 5 additions & 3 deletions .devops/llama-cli-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04

FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build

ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
Expand All @@ -14,10 +14,12 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
echo "Building with static libs" && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
cmake --build build --config Release --target llama-cli

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime

COPY --from=build /app/build/bin/llama-cli /llama-cli

Expand Down
2 changes: 1 addition & 1 deletion .devops/llama-cli-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
Expand Down
2 changes: 1 addition & 1 deletion .devops/llama-cli-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget libgomp1
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git
Expand All @@ -11,7 +11,7 @@ COPY . .

RUN make -j$(nproc) llama-cli

FROM ubuntu:$UBUNTU_VERSION as runtime
FROM ubuntu:$UBUNTU_VERSION AS runtime

RUN apt-get update && \
apt-get install -y libgomp1
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-server-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build
FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
Expand All @@ -27,7 +27,7 @@ ENV LLAMA_CURL=1

RUN make -j$(nproc) llama-server

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl
Expand Down
5 changes: 3 additions & 2 deletions .devops/llama-server-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04

FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build

ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
Expand All @@ -14,10 +14,11 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
cmake --build build --config Release --target llama-server

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev curl
Expand Down
2 changes: 1 addition & 1 deletion .devops/llama-server-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
Expand Down
2 changes: 1 addition & 1 deletion .devops/llama-server-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git libcurl4-openssl-dev curl
Expand All @@ -13,7 +13,7 @@ ENV LLAMA_CURL=1

RUN make -j$(nproc) llama-server

FROM ubuntu:$UBUNTU_VERSION as runtime
FROM ubuntu:$UBUNTU_VERSION AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1
Expand Down
1 change: 0 additions & 1 deletion .devops/nix/apps.nix
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"llama-embedding"
"llama-server"
"llama-quantize"
"llama-train-text-from-scratch"
];
mkApp = name: {
type = "app";
Expand Down
48 changes: 28 additions & 20 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
rocmPackages,
vulkan-headers,
vulkan-loader,
clblast,
curl,
shaderc,
useBlas ? builtins.all (x: !x) [
useCuda
useMetalKit
useOpenCL
useRocm
useVulkan
] && blas.meta.available,
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
useMpi ? false, # Increases the runtime closure size by ~700M
useOpenCL ? false,
useRocm ? config.rocmSupport,
enableCurl ? true,
useVulkan ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake

Expand All @@ -56,7 +56,6 @@ let
++ lib.optionals useCuda [ "CUDA" ]
++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useMpi [ "MPI" ]
++ lib.optionals useOpenCL [ "OpenCL" ]
++ lib.optionals useRocm [ "ROCm" ]
++ lib.optionals useVulkan [ "Vulkan" ];

Expand Down Expand Up @@ -91,6 +90,22 @@ let
ps.tiktoken
ps.torchWithoutCuda
ps.transformers

# server bench
ps.matplotlib

# server tests
ps.openai
ps.behave
ps.prometheus-client

# for examples/pydantic-models-to-grammar-examples.py
ps.docstring-parser
ps.pydantic

# for scripts/compare-llama-bench.py
ps.gitpython
ps.tabulate
]
);

Expand All @@ -111,16 +126,9 @@ let
++ optionals useMetalKit [ MetalKit ];

cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>

# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
cuda_cudart
cuda_cccl # <nv/target>
libcublas
];

rocmBuildInputs = with rocmPackages; [
Expand All @@ -132,6 +140,7 @@ let
vulkanBuildInputs = [
vulkan-headers
vulkan-loader
shaderc
];
in

Expand Down Expand Up @@ -198,19 +207,19 @@ effectiveStdenv.mkDerivation (
optionals effectiveStdenv.isDarwin darwinBuildInputs
++ optionals useCuda cudaBuildInputs
++ optionals useMpi [ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useRocm rocmBuildInputs
++ optionals useBlas [ blas ]
++ optionals useVulkan vulkanBuildInputs;
++ optionals useVulkan vulkanBuildInputs
++ optionals enableCurl [ curl ];

cmakeFlags =
[
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
(cmakeBool "LLAMA_CURL" enableCurl)
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CLBLAST" useOpenCL)
(cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "GGML_HIPBLAS" useRocm)
(cmakeBool "GGML_METAL" useMetalKit)
Expand Down Expand Up @@ -254,7 +263,6 @@ effectiveStdenv.mkDerivation (
useCuda
useMetalKit
useMpi
useOpenCL
useRocm
useVulkan
;
Expand All @@ -281,7 +289,7 @@ effectiveStdenv.mkDerivation (
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
badPlatforms = optionals useCuda lib.platforms.darwin;

# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
Expand Down
6 changes: 1 addition & 5 deletions .devops/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@ arg1="$1"
shift

if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
python3 ./convert-hf-to-gguf.py "$@"
python3 ./convert_hf_to_gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
./llama-cli "$@"
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
./llama-finetune "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
Expand All @@ -36,8 +34,6 @@ else
echo " ex: --outtype f16 \"/models/7B/\" "
echo " --quantize (-q): Optimize with quantization process ggml"
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
echo " See documentation for finetune for command-line parameters"
echo " --all-in-one (-a): Execute --convert & --quantize"
echo " ex: \"/models/\" 7B"
echo " --server (-s): Run a model on the server"
Expand Down
2 changes: 0 additions & 2 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,3 @@ contact_links:
- name: Want to contribute?
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
about: Head to the contribution guide page of the wiki for areas you can help with


4 changes: 3 additions & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ SYCL:
- any-glob-to-any-file:
- ggml/include/ggml-sycl.h
- ggml/src/ggml-sycl.cpp
- README-sycl.md
- ggml/src/ggml-sycl/**
- docs/backend/SYCL.md
- examples/sycl/**
Nvidia GPU:
- changed-files:
- any-glob-to-any-file:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -355,8 +355,10 @@ jobs:
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libvulkan-dev
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt-get update -y
sudo apt-get install -y build-essential vulkan-sdk
- name: Build
id: cmake_build
Expand Down Expand Up @@ -858,6 +860,7 @@ jobs:
mkdir build
cd build
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Determine tag name
Expand Down
Loading

0 comments on commit 2b74648

Please sign in to comment.