Nexesenex · Nexesenex · Mar 25, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/.clang-tidy b/.clang-tidy
@@ -12,6 +12,7 @@ Checks: >
     -readability-implicit-bool-conversion,
     -readability-magic-numbers,
     -readability-uppercase-literal-suffix,
+    -readability-simplify-boolean-expr,
     clang-analyzer-*,
     -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
     performance-*,

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
@@ -10,7 +10,7 @@
   git,
   python3,
   mpi,
-  openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
+  blas,
   cudaPackages,
   darwin,
   rocmPackages,
@@ -181,6 +181,7 @@ effectiveStdenv.mkDerivation (
       ++ optionals useMpi [ mpi ]
       ++ optionals useOpenCL [ clblast ]
       ++ optionals useRocm rocmBuildInputs
+      ++ optionals useBlas [ blas ]
       ++ optionals useVulkan vulkanBuildInputs;
 
     cmakeFlags =
@@ -216,8 +217,7 @@ effectiveStdenv.mkDerivation (
         # Should likely use `rocmPackages.clr.gpuTargets`.
         "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
       ]
-      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
-      ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
+      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ];
 
     # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
     # if they haven't been added yet.

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -15,14 +15,133 @@ on:
     types: [opened, synchronize, reopened]
     paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
   GGML_NLOOP: 3
   GGML_N_THREADS: 1
 
 jobs:
+  macOS-latest-cmake-arm64:
+    runs-on: macos-14
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Dependencies
+        id: depends
+        continue-on-error: true
+        run: |
+          brew update
+
+      - name: Build
+        id: cmake_build
+        run: |
+          sysctl -a
+          mkdir build
+          cd build
+          cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
+          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest -L main --verbose --timeout 900
+
+      - name: Determine tag name
+        id: tag
+        shell: bash
+        run: |
+          BUILD_NUMBER="$(git rev-list --count HEAD)"
+          SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+          if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+            echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+          else
+            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+            echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          cp LICENSE ./build/bin/
+          zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+
+  macOS-latest-cmake-x64:
+    runs-on: macos-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Dependencies
+        id: depends
+        continue-on-error: true
+        run: |
+          brew update
+
+      - name: Build
+        id: cmake_build
+        run: |
+          sysctl -a
+          mkdir build
+          cd build
+          cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
+          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest -L main --verbose --timeout 900
+
+      - name: Determine tag name
+        id: tag
+        shell: bash
+        run: |
+          BUILD_NUMBER="$(git rev-list --count HEAD)"
+          SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+          if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+            echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+          else
+            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+            echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          cp LICENSE ./build/bin/
+          zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+
   ubuntu-focal-make:
     runs-on: ubuntu-20.04
+    env:
+      LLAMA_NODE_AVAILABLE: true
+      LLAMA_PYTHON_AVAILABLE: true
 
     steps:
       - name: Clone
@@ -35,6 +154,14 @@ jobs:
           sudo apt-get update
           sudo apt-get install build-essential gcc-8
 
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
       - name: Build
         id: make_build
         env:
@@ -98,6 +225,17 @@ jobs:
           cd build
           ctest -L main --verbose --timeout 900
 
+      - name: Test llama2c conversion
+        id: llama2c_test
+        run: |
+          cd build
+          echo "Fetch tokenizer"
+          wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
+          echo "Fetch llama2c model"
+          wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
+          ./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
+          ./bin/main -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+
 #  ubuntu-latest-cmake-sanitizer:
 #    runs-on: ubuntu-latest
 #
@@ -662,6 +800,7 @@ jobs:
 
   windows-latest-cmake-sycl:
     runs-on: windows-latest
+
     defaults:
       run:
         shell: bash
@@ -670,7 +809,6 @@ jobs:
       WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/62641e01-1e8d-4ace-91d6-ae03f7f8a71f/w_BaseKit_p_2024.0.0.49563_offline.exe
       WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
 
-
     steps:
       - name: Clone
         id: checkout
@@ -685,6 +823,32 @@ jobs:
         id: cmake_build
         run:  examples/sycl/win-build-sycl.bat
 
+      - name: Determine tag name
+        id: tag
+        shell: bash
+        run: |
+          BUILD_NUMBER="$(git rev-list --count HEAD)"
+          SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+          if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+            echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+          else
+            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+            echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip .\build\bin\*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+
   ios-xcode-build:
     runs-on: macos-latest
 
@@ -748,6 +912,8 @@ jobs:
       - macOS-latest-cmake
       - windows-latest-cmake
       - windows-latest-cmake-cublas
+      - macOS-latest-cmake-arm64
+      - macOS-latest-cmake-x64
 
     steps:
       - name: Clone

diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml
@@ -19,5 +19,5 @@ jobs:
           close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
           days-before-pr-stale: -1
           days-before-pr-close: -1
-          operations-per-run: 1000
+          operations-per-run: 10000
           repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
@@ -5,6 +5,10 @@ env:
   GGML_NLOOP: 3
   GGML_N_THREADS: 1
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   run:
     runs-on: ubuntu-20.04

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -15,6 +15,10 @@ on:
     branches:
       - master
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub

diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
@@ -14,6 +14,10 @@ on:
     branches:
       - master
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   editorconfig:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/nix-ci-aarch64.yml b/.github/workflows/nix-ci-aarch64.yml
@@ -17,6 +17,10 @@ on:
     types: [opened, synchronize, reopened]
     paths: ['**/*.nix', 'flake.lock']
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   nix-build-aarch64:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/nix-ci.yml b/.github/workflows/nix-ci.yml
@@ -8,6 +8,10 @@ on:
   pull_request:
     types: [opened, synchronize, reopened]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   nix-eval:
     strategy:

diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
@@ -16,6 +16,10 @@ on:
       - 'requirements.txt'
       - 'requirements/*.txt'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   python-check-requirements:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
@@ -2,6 +2,10 @@ name: flake8 Lint
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   flake8-lint:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -18,6 +18,10 @@ on:
   schedule:
     -  cron: '0 0 * * *'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   server:
     runs-on: ubuntu-latest
@@ -31,7 +35,6 @@ jobs:
         include:
           - build_type: Release
             sanitizer: ""
-            disabled_on_pr: true
       fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
 
     container:

diff --git a/.github/workflows/zig-build.yml b/.github/workflows/zig-build.yml
@@ -6,6 +6,10 @@ on:
     branches:
       - master
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   build:
     strategy: