Merge remote-tracking branch 'origin/master' into json-type

ggerganov · Jun 22, 2024 · 9c2cc11 · 9c2cc11
2 parents 4de4dc0 + b5a5f34
commit 9c2cc11
Show file tree

Hide file tree

Showing 144 changed files with 50,792 additions and 43,174 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -26,3 +26,7 @@ indent_size = 2
 
 [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
 indent_style = tab
+
+[examples/cvector-generator/*.txt]
+trim_trailing_whitespace = unset
+insert_final_newline = unset
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -42,7 +42,6 @@ build:
             - cmake/**
             - CMakeLists.txt
             - CMakePresets.json
-            - codecov.yml
 examples:
     - changed-files:
         - any-glob-to-any-file: examples/**

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -1,5 +1,7 @@
-- Self Reported Review Complexity:
-    - [ ] Review Complexity : Low
-    - [ ] Review Complexity : Medium
-    - [ ] Review Complexity : High
-- [ ] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
+
+
+- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
+- Self-reported review complexity:
+  - [ ] Low
+  - [ ] Medium
+  - [ ] High
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -84,7 +84,7 @@ jobs:
           name: llama-bin-macos-arm64.zip
 
   macOS-latest-cmake-x64:
-    runs-on: macos-latest
+    runs-on: macos-12
 
     steps:
       - name: Clone

diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -87,8 +87,22 @@ jobs:
             exit 1
           fi
 
+      - name: Build (no OpenMP)
+        id: cmake_build_no_openmp
+        if: ${{ matrix.sanitizer == 'THREAD' }}
+        run: |
+          cmake -B build \
+              -DLLAMA_NATIVE=OFF \
+              -DLLAMA_BUILD_SERVER=ON \
+              -DLLAMA_CURL=ON \
+              -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+              -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+              -DLLAMA_OPENMP=OFF ;
+          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
       - name: Build
         id: cmake_build
+        if: ${{ matrix.sanitizer != 'THREAD' }}
         run: |
           cmake -B build \
               -DLLAMA_NATIVE=OFF \

diff --git a/.gitignore b/.gitignore
@@ -1,90 +1,123 @@
-*.o
+# Extensions
+
 *.a
-*.so
-*.gguf
-*.gguf.json
+*.bat
 *.bin
-*.exe
 *.dll
-*.log
-*.gcov
-*.gcno
-*.gcda
 *.dot
-*.bat
-*.tmp
-*.metallib
 *.etag
+*.exe
+*.gcda
+*.gcno
+*.gcov
+*.gguf
+*.gguf.json
 *.lastModified
-.DS_Store
-.build/
+*.log
+*.metallib
+*.o
+*.so
+*.tmp
+
+# IDE / OS
+
 .cache/
 .ccls-cache/
 .direnv/
+.DS_Store
 .envrc
+.idea/
 .swiftpm
-.venv
-.clang-tidy
 .vs/
 .vscode/
-.idea/
+nppBackup
 
-ggml-metal-embed.metal
 
-lcov-report/
+# Coverage
+
 gcovr-report/
+lcov-report/
+
+# Build Artifacts
 
 tags
+.build/
 build*
+!build-info.cmake
+!build-info.cpp.in
+!build-info.sh
 !build.zig
-cmake-build-*
+/libllama.so
+/llama-*
 android-ndk-*
+arm_neon.h
+cmake-build-*
+CMakeSettings.json
+compile_commands.json
+ggml-metal-embed.metal
+llama-batched-swift
 out/
 tmp/
 
+# CI
+
+!.github/workflows/*.yml
+
+# Models
+
 models/*
 models-mnt
+!models/.editorconfig
+!models/ggml-vocab-*.gguf*
 
-/Pipfile
-/libllama.so
-/llama-*
-llama-batched-swift
-/common/build-info.cpp
-arm_neon.h
-compile_commands.json
-CMakeSettings.json
-
-__pycache__
-dist
+# Zig
 
 zig-out/
 zig-cache/
 
+# Logs
+
 ppl-*.txt
 qnt-*.txt
 perf-*.txt
 
+# Examples
+
 examples/jeopardy/results.txt
+examples/server/*.css.hpp
 examples/server/*.html.hpp
 examples/server/*.js.hpp
 examples/server/*.mjs.hpp
-examples/server/*.css.hpp
+!build_64.sh
+!examples/*.bat
+!examples/*/*.kts
+!examples/*/*/*.kts
+!examples/sycl/*.bat
+!examples/sycl/*.sh
 
+# Python
+
+__pycache__
+.venv
+/Pipfile
+dist
 poetry.lock
 poetry.toml
-nppBackup
 
 # Test binaries
-/tests/test-grammar-parser
-/tests/test-llama-grammar
+/tests/test-backend-ops
 /tests/test-double-float
 /tests/test-grad0
+/tests/test-grammar-parser
+/tests/test-llama-grammar
 /tests/test-opt
 /tests/test-quantize-fns
 /tests/test-quantize-perf
+/tests/test-rope
 /tests/test-sampling
 /tests/test-tokenizer-0
-/tests/test-tokenizer-1-spm
 /tests/test-tokenizer-1-bpe
-/tests/test-rope
-/tests/test-backend-ops
+/tests/test-tokenizer-1-spm
+
+# Scripts
+!/scripts/install-oneapi.bat
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -39,8 +39,12 @@ endif()
 
 if (APPLE)
     set(LLAMA_METAL_DEFAULT ON)
+    set(LLAMA_BLAS_DEFAULT ON)
+    set(LLAMA_BLAS_VENDOR_DEFAULT "Apple")
 else()
     set(LLAMA_METAL_DEFAULT OFF)
+    set(LLAMA_BLAS_DEFAULT OFF)
+    set(LLAMA_BLAS_VENDOR_DEFAULT "Generic")
 endif()
 
 set(LLAMA_LLAMAFILE_DEFAULT ON)
@@ -91,9 +95,10 @@ endif()
 
 # 3rd party libs
 option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  ${LLAMA_BLAS_DEFAULT})
+set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING
+                                             "llama: BLAS library vendor")
 option(LLAMA_LLAMAFILE                       "llama: use llamafile SGEMM"                       ${LLAMA_LLAMAFILE_DEFAULT})
-set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
 option(LLAMA_CUDA                            "llama: use CUDA"                                  OFF)
 option(LLAMA_CUBLAS                          "llama: use CUDA (deprecated, use LLAMA_CUDA)"     OFF)
 option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
@@ -114,6 +119,7 @@ option(LLAMA_HIP_UMA                         "llama: use HIP unified memory arch
 option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
 option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
 option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_MEMORY_DEBUG             "llama: enable Vulkan memory debug output"         OFF)
 option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
 option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
 option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
@@ -311,17 +317,17 @@ if (LLAMA_BLAS)
     if (LLAMA_STATIC)
         set(BLA_STATIC ON)
     endif()
-    if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
-        set(BLA_SIZEOF_INTEGER 8)
-    endif()
+    #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
+    #    set(BLA_SIZEOF_INTEGER 8)
+    #endif()
 
     set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
     find_package(BLAS)
 
     if (BLAS_FOUND)
         message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
 
-        if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
+        if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple"))
             # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
             # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
             find_package(PkgConfig REQUIRED)
@@ -374,12 +380,15 @@ if (LLAMA_BLAS)
 
         add_compile_options(${BLAS_LINKER_FLAGS})
 
-        add_compile_definitions(GGML_USE_OPENBLAS)
+        add_compile_definitions(GGML_USE_BLAS)
 
         if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
             add_compile_definitions(GGML_BLAS_USE_MKL)
         endif()
 
+        set(GGML_HEADERS_BLAS ggml-blas.h)
+        set(GGML_SOURCES_BLAS ggml-blas.cpp)
+
         set(LLAMA_EXTRA_LIBS     ${LLAMA_EXTRA_LIBS}     ${BLAS_LIBRARIES})
         set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
     else()
@@ -526,6 +535,10 @@ if (LLAMA_VULKAN)
             add_compile_definitions(GGML_VULKAN_DEBUG)
         endif()
 
+        if (LLAMA_VULKAN_MEMORY_DEBUG)
+            add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
+        endif()
+
         if (LLAMA_VULKAN_VALIDATE)
             add_compile_definitions(GGML_VULKAN_VALIDATE)
         endif()
@@ -652,6 +665,7 @@ if (LLAMA_SYCL)
     #todo: AOT
 
     find_package(IntelSYCL REQUIRED)
+    find_package(MKL REQUIRED)
 
     message(STATUS "SYCL found")
 
@@ -666,21 +680,22 @@ if (LLAMA_SYCL)
     endif()
 
     add_compile_options(-I./) #include DPCT
-    add_compile_options(-I/${SYCL_INCLUDE_DIR})
 
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
     if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
     endif()
 
     set(GGML_HEADERS_SYCL ggml-sycl.h)
-    set(GGML_SOURCES_SYCL ggml-sycl.cpp)
+    file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
+    list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
 
     if (WIN32)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
     else()
+        add_compile_options(-I/${SYCL_INCLUDE_DIR})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
         if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
             set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
         elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
@@ -1258,6 +1273,7 @@ add_library(ggml OBJECT
             ${GGML_SOURCES_KOMPUTE}   ${GGML_HEADERS_KOMPUTE}
             ${GGML_SOURCES_VULKAN}    ${GGML_HEADERS_VULKAN}
             ${GGML_SOURCES_ROCM}      ${GGML_HEADERS_ROCM}
+            ${GGML_SOURCES_BLAS}      ${GGML_HEADERS_BLAS}
             ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
             )