diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 886d33d2d5640..f3326a5fbab82 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -552,44 +552,35 @@ jobs: -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - macOS-latest-swift: - runs-on: macos-latest - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build llama.cpp with CMake - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) - sudo cmake --install . --config Release - - - name: xcodebuild for swift package - id: xcodebuild - run: | - xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}" +# TODO: tmp disabled. see for possible re-enable: +# https://github.com/ggerganov/llama.cpp/pull/10525 +# macOS-latest-swift: +# runs-on: macos-latest +# +# strategy: +# matrix: +# destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v4 +# +# - name: Dependencies +# id: depends +# continue-on-error: true +# run: | +# brew update +# +# - name: xcodebuild for swift package +# id: xcodebuild +# run: | +# xcodebuild -scheme llama -destination "${{ matrix.destination }}" +# +# - name: Build Swift Example +# id: make_build_swift_example +# run: | +# make swift windows-msys2: runs-on: windows-latest @@ -1113,29 +1104,6 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - sudo cmake --install . --config Release - - - name: xcodebuild for swift package - id: xcodebuild - run: | - xcodebuild -scheme llama-Package -destination 'generic/platform=iOS' - - name: Build Xcode project run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build @@ -1163,6 +1131,23 @@ jobs: ./gradlew build --no-daemon +# freeBSD-latest: +# runs-on: macos-12 +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Build +# uses: cross-platform-actions/action@v0.19.0 +# with: +# operating_system: freebsd +# version: '13.2' +# hypervisor: 'qemu' +# run: | +# sudo pkg update +# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas +# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu` + release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} diff --git a/Package.swift b/Package.swift index 01c996d242037..3afeb2f1930e4 100644 --- a/Package.swift +++ b/Package.swift @@ -2,6 +2,60 @@ import PackageDescription +var sources = [ + "src/llama.cpp", + "src/llama-vocab.cpp", + "src/llama-grammar.cpp", + "src/llama-sampling.cpp", + "src/unicode.cpp", + "src/unicode-data.cpp", + "ggml/src/ggml.c", + "ggml/src/ggml-alloc.c", + "ggml/src/ggml-backend.cpp", + "ggml/src/ggml-backend-reg.cpp", + "ggml/src/ggml-cpu/ggml-cpu.c", + "ggml/src/ggml-cpu/ggml-cpu.cpp", + "ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp", + "ggml/src/ggml-cpu/ggml-cpu-hbm.cpp", + "ggml/src/ggml-cpu/ggml-cpu-quants.c", + "ggml/src/ggml-cpu/ggml-cpu-traits.cpp", + "ggml/src/ggml-threading.cpp", + "ggml/src/ggml-quants.c", +] + +var resources: [Resource] = [] +var linkerSettings: [LinkerSetting] = [] +var cSettings: [CSetting] = [ + .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), + .unsafeFlags(["-fno-objc-arc"]), + .headerSearchPath("ggml/src"), + .headerSearchPath("ggml/src/ggml-cpu"), + // NOTE: NEW_LAPACK will required iOS version 16.4+ + // We should consider add this in the future when we drop support for iOS 14 + // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc) + // .define("ACCELERATE_NEW_LAPACK"), + // .define("ACCELERATE_LAPACK_ILP64") + .define("GGML_USE_CPU"), +] + + +#if canImport(Darwin) +sources.append("ggml/src/ggml-common.h") +sources.append("ggml/src/ggml-metal/ggml-metal.m") +resources.append(.process("ggml/src/ggml-metal/ggml-metal.metal")) +linkerSettings.append(.linkedFramework("Accelerate")) +cSettings.append( + contentsOf: [ + .define("GGML_USE_ACCELERATE"), + .define("GGML_USE_METAL"), + ] +) +#endif + +#if os(Linux) + cSettings.append(.define("_GNU_SOURCE")) +#endif + let package = Package( name: "llama", platforms: [ @@ -14,6 +68,26 @@ let package = Package( .library(name: "llama", targets: ["llama"]), ], targets: [ - .systemLibrary(name: "llama", pkgConfig: "llama"), - ] + .target( + name: "llama", + path: ".", + exclude: [ + "build", + "cmake", + "examples", + "scripts", + "models", + "tests", + "CMakeLists.txt", + "Makefile", + "ggml/src/ggml-metal-embed.metal" + ], + sources: sources, + resources: resources, + publicHeadersPath: "spm-headers", + cSettings: cSettings, + linkerSettings: linkerSettings + ) + ], + cxxLanguageStandard: .cxx17 ) diff --git a/Sources/llama/llama.h b/Sources/llama/llama.h deleted file mode 100644 index 41725880ed8c0..0000000000000 --- a/Sources/llama/llama.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include - diff --git a/Sources/llama/module.modulemap b/Sources/llama/module.modulemap deleted file mode 100644 index d010555b1cb65..0000000000000 --- a/Sources/llama/module.modulemap +++ /dev/null @@ -1,5 +0,0 @@ -module llama [system] { - header "llama.h" - link "llama" - export * -} diff --git a/cmake/llama.pc.in b/cmake/llama.pc.in index 0b2b6bcfabfd1..326acbb6108fd 100644 --- a/cmake/llama.pc.in +++ b/cmake/llama.pc.in @@ -6,5 +6,5 @@ includedir=${prefix}/include Name: llama Description: Port of Facebook's LLaMA model in C/C++ Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lggml -lggml-base -lllama +Libs: -L${libdir} -lllama Cflags: -I${includedir} diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 998c673d5d31f..65cd4eb515c7f 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -210,20 +210,20 @@ actor LlamaContext { llama_kv_cache_clear(context) - let t_pp_start = DispatchTime.now().uptimeNanoseconds / 1000; + let t_pp_start = ggml_time_us() if llama_decode(context, batch) != 0 { print("llama_decode() failed during prompt") } llama_synchronize(context) - let t_pp_end = DispatchTime.now().uptimeNanoseconds / 1000; + let t_pp_end = ggml_time_us() // bench text generation llama_kv_cache_clear(context) - let t_tg_start = DispatchTime.now().uptimeNanoseconds / 1000; + let t_tg_start = ggml_time_us() for i in 0..