diff --git a/src/ggml-blas.h b/include/ggml-blas.h similarity index 100% rename from src/ggml-blas.h rename to include/ggml-blas.h diff --git a/src/ggml-cuda.h b/include/ggml-cuda.h similarity index 100% rename from src/ggml-cuda.h rename to include/ggml-cuda.h diff --git a/src/ggml-kompute.h b/include/ggml-kompute.h similarity index 100% rename from src/ggml-kompute.h rename to include/ggml-kompute.h diff --git a/src/ggml-metal.h b/include/ggml-metal.h similarity index 100% rename from src/ggml-metal.h rename to include/ggml-metal.h diff --git a/src/ggml-rpc.h b/include/ggml-rpc.h similarity index 100% rename from src/ggml-rpc.h rename to include/ggml-rpc.h diff --git a/src/ggml-sycl.h b/include/ggml-sycl.h similarity index 95% rename from src/ggml-sycl.h rename to include/ggml-sycl.h index 451938fc4..43ab1519c 100644 --- a/src/ggml-sycl.h +++ b/include/ggml-sycl.h @@ -8,7 +8,9 @@ #include "ggml.h" #include "ggml-backend.h" -#include "ggml-sycl/presets.hpp" + +#define GGML_SYCL_NAME "SYCL" +#define GGML_SYCL_MAX_DEVICES 48 #ifdef __cplusplus extern "C" { diff --git a/src/ggml-vulkan.h b/include/ggml-vulkan.h similarity index 100% rename from src/ggml-vulkan.h rename to include/ggml-vulkan.h diff --git a/scripts/sync-llama-am.sh b/scripts/sync-llama-am.sh index 70d46f185..ea00c7455 100755 --- a/scripts/sync-llama-am.sh +++ b/scripts/sync-llama-am.sh @@ -110,25 +110,26 @@ if [ -f $SRC_GGML/llama-src.patch ]; then # ggml/src/ggml-common.h -> src/ggml-common.h # ggml/src/ggml-cuda/* -> src/ggml-cuda/* # ggml/src/ggml-cuda.cu -> src/ggml-cuda.cu - # ggml/src/ggml-cuda.h -> src/ggml-cuda.h # ggml/src/ggml-impl.h -> src/ggml-impl.h # ggml/src/ggml-kompute.cpp -> src/ggml-kompute.cpp - # ggml/src/ggml-kompute.h -> src/ggml-kompute.h - # ggml/src/ggml-metal.h -> src/ggml-metal.h # ggml/src/ggml-metal.m -> src/ggml-metal.m # ggml/src/ggml-quants.c -> src/ggml-quants.c # ggml/src/ggml-quants.h -> src/ggml-quants.h # ggml/src/ggml-rpc.cpp -> src/ggml-rpc.cpp - # ggml/src/ggml-rpc.h -> src/ggml-rpc.h # ggml/src/ggml-sycl/* -> src/ggml-sycl/* # ggml/src/ggml-sycl.cpp -> src/ggml-sycl.cpp - # ggml/src/ggml-sycl.h -> src/ggml-sycl.h # ggml/src/ggml-vulkan.cpp -> src/ggml-vulkan.cpp - # ggml/src/ggml-vulkan.h -> src/ggml-vulkan.h # # ggml/include/ggml.h -> include/ggml.h # ggml/include/ggml-alloc.h -> include/ggml-alloc.h # ggml/include/ggml-backend.h -> include/ggml-backend.h + # ggml/include/ggml-blas.h -> include/ggml-blas.h + # ggml/include/ggml-cuda.h -> include/ggml-cuda.h + # ggml/include/ggml-kompute.h -> include/ggml-kompute.h + # ggml/include/ggml-metal.h -> include/ggml-metal.h + # ggml/include/ggml-rpc.h -> include/ggml-rpc.h + # ggml/include/ggml-sycl.h -> include/ggml-sycl.h + # ggml/include/ggml-vulkan.h -> include/ggml-vulkan.h # # tests/test-opt.cpp -> tests/test-opt.cpp # tests/test-grad0.cpp -> tests/test-grad0.cpp @@ -152,24 +153,25 @@ if [ -f $SRC_GGML/llama-src.patch ]; then -e 's/\/ggml\/src\/ggml-common\.h/\/src\/ggml-common.h/g' \ -e 's/\/ggml\/src\/ggml-cuda\//\/src\/ggml-cuda\//g' \ -e 's/\/ggml\/src\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \ - -e 's/\/ggml\/src\/ggml-cuda\.h/\/src\/ggml-cuda.h/g' \ -e 's/\/ggml\/src\/ggml-impl\.h/\/src\/ggml-impl.h/g' \ -e 's/\/ggml\/src\/ggml-kompute\.cpp/\/src\/ggml-kompute.cpp/g' \ - -e 's/\/ggml\/src\/ggml-kompute\.h/\/src\/ggml-kompute.h/g' \ - -e 's/\/ggml\/src\/ggml-metal\.h/\/src\/ggml-metal.h/g' \ -e 's/\/ggml\/src\/ggml-metal\.m/\/src\/ggml-metal.m/g' \ -e 's/\/ggml\/src\/ggml-quants\.c/\/src\/ggml-quants.c/g' \ -e 's/\/ggml\/src\/ggml-quants\.h/\/src\/ggml-quants.h/g' \ -e 's/\/ggml\/src\/ggml-rpc\.cpp/\/src\/ggml-rpc.cpp/g' \ - -e 's/\/ggml\/src\/ggml-rpc\.h/\/src\/ggml-rpc.h/g' \ -e 's/\/ggml\/src\/ggml-sycl\//\/src\/ggml-sycl\//g' \ -e 's/\/ggml\/src\/ggml-sycl\.cpp/\/src\/ggml-sycl.cpp/g' \ - -e 's/\/ggml\/src\/ggml-sycl\.h/\/src\/ggml-sycl.h/g' \ -e 's/\/ggml\/src\/ggml-vulkan\.cpp/\/src\/ggml-vulkan.cpp/g' \ - -e 's/\/ggml\/src\/ggml-vulkan\.h/\/src\/ggml-vulkan.h/g' \ -e 's/\/ggml\/include\/ggml\.h/\/include\/ggml.h/g' \ -e 's/\/ggml\/include\/ggml-alloc\.h/\/include\/ggml-alloc.h/g' \ -e 's/\/ggml\/include\/ggml-backend\.h/\/include\/ggml-backend.h/g' \ + -e 's/\/ggml\/include\/ggml-blas\.h/\/include\/ggml-blas.h/g' \ + -e 's/\/ggml\/include\/ggml-cuda\.h/\/include\/ggml-cuda.h/g' \ + -e 's/\/ggml\/include\/ggml-kompute\.h/\/include\/ggml-kompute.h/g' \ + -e 's/\/ggml\/include\/ggml-metal\.h/\/include\/ggml-metal.h/g' \ + -e 's/\/ggml\/include\/ggml-rpc\.h/\/include\/ggml-rpc.h/g' \ + -e 's/\/ggml\/include\/ggml-sycl\.h/\/include\/ggml-sycl.h/g' \ + -e 's/\/ggml\/include\/ggml-vulkan\.h/\/include\/ggml-vulkan.h/g' \ -e 's/\/tests\/test-opt\.cpp/\/tests\/test-opt.cpp/g' \ -e 's/\/tests\/test-grad0\.cpp/\/tests\/test-grad0.cpp/g' \ -e 's/\/tests\/test-quantize-fns\.cpp/\/tests\/test-quantize-fns.cpp/g' \ diff --git a/scripts/sync-llama.sh b/scripts/sync-llama.sh index 614093155..c4c17433b 100755 --- a/scripts/sync-llama.sh +++ b/scripts/sync-llama.sh @@ -13,26 +13,27 @@ cp -rpv ../llama.cpp/ggml/src/ggml-blas.h src/ggml-blas.h cp -rpv ../llama.cpp/ggml/src/ggml-common.h src/ggml-common.h cp -rpv ../llama.cpp/ggml/src/ggml-cuda/* src/ggml-cuda/ cp -rpv ../llama.cpp/ggml/src/ggml-cuda.cu src/ggml-cuda.cu -cp -rpv ../llama.cpp/ggml/src/ggml-cuda.h src/ggml-cuda.h cp -rpv ../llama.cpp/ggml/src/ggml-impl.h src/ggml-impl.h cp -rpv ../llama.cpp/ggml/src/ggml-kompute.cpp src/ggml-kompute.cpp -cp -rpv ../llama.cpp/ggml/src/ggml-kompute.h src/ggml-kompute.h -cp -rpv ../llama.cpp/ggml/src/ggml-metal.h src/ggml-metal.h cp -rpv ../llama.cpp/ggml/src/ggml-metal.m src/ggml-metal.m cp -rpv ../llama.cpp/ggml/src/ggml-metal.metal src/ggml-metal.metal cp -rpv ../llama.cpp/ggml/src/ggml-quants.c src/ggml-quants.c cp -rpv ../llama.cpp/ggml/src/ggml-quants.h src/ggml-quants.h cp -rpv ../llama.cpp/ggml/src/ggml-rpc.cpp src/ggml-rpc.cpp -cp -rpv ../llama.cpp/ggml/src/ggml-rpc.h src/ggml-rpc.h cp -rpv ../llama.cpp/ggml/src/ggml-sycl/* src/ggml-sycl/ cp -rpv ../llama.cpp/ggml/src/ggml-sycl.cpp src/ggml-sycl.cpp -cp -rpv ../llama.cpp/ggml/src/ggml-sycl.h src/ggml-sycl.h cp -rpv ../llama.cpp/ggml/src/ggml-vulkan.cpp src/ggml-vulkan.cpp -cp -rpv ../llama.cpp/ggml/src/ggml-vulkan.h src/ggml-vulkan.h cp -rpv ../llama.cpp/ggml/include/ggml.h include/ggml.h cp -rpv ../llama.cpp/ggml/include/ggml-alloc.h include/ggml-alloc.h cp -rpv ../llama.cpp/ggml/include/ggml-backend.h include/ggml-backend.h +cp -rpv ../llama.cpp/ggml/include/ggml-blas.h include/ggml-blas.h +cp -rpv ../llama.cpp/ggml/include/ggml-cuda.h include/ggml-cuda.h +cp -rpv ../llama.cpp/ggml/include/ggml-kompute.h include/ggml-kompute.h +cp -rpv ../llama.cpp/ggml/include/ggml-metal.h include/ggml-metal.h +cp -rpv ../llama.cpp/ggml/include/ggml-rpc.h include/ggml-rpc.h +cp -rpv ../llama.cpp/ggml/include/ggml-sycl.h include/ggml-sycl.h +cp -rpv ../llama.cpp/ggml/include/ggml-vulkan.h include/ggml-vulkan.h cp -rpv ../llama.cpp/tests/test-opt.cpp tests/test-opt.cpp cp -rpv ../llama.cpp/tests/test-grad0.cpp tests/test-grad0.cpp diff --git a/scripts/sync-whisper-am.sh b/scripts/sync-whisper-am.sh index d23c7c628..c46790d44 100755 --- a/scripts/sync-whisper-am.sh +++ b/scripts/sync-whisper-am.sh @@ -108,25 +108,26 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then # ggml/src/ggml-common.h -> src/ggml-common.h # ggml/src/ggml-cuda/* -> src/ggml-cuda/* # ggml/src/ggml-cuda.cu -> src/ggml-cuda.cu - # ggml/src/ggml-cuda.h -> src/ggml-cuda.h # ggml/src/ggml-impl.h -> src/ggml-impl.h # ggml/src/ggml-kompute.cpp -> src/ggml-kompute.cpp - # ggml/src/ggml-kompute.h -> src/ggml-kompute.h - # ggml/src/ggml-metal.h -> src/ggml-metal.h # ggml/src/ggml-metal.m -> src/ggml-metal.m # ggml/src/ggml-quants.c -> src/ggml-quants.c # ggml/src/ggml-quants.h -> src/ggml-quants.h # ggml/src/ggml-rpc.cpp -> src/ggml-rpc.cpp - # ggml/src/ggml-rpc.h -> src/ggml-rpc.h # ggml/src/ggml-sycl/* -> src/ggml-sycl/* # ggml/src/ggml-sycl.cpp -> src/ggml-sycl.cpp - # ggml/src/ggml-sycl.h -> src/ggml-sycl.h # ggml/src/ggml-vulkan.cpp -> src/ggml-vulkan.cpp - # ggml/src/ggml-vulkan.h -> src/ggml-vulkan.h # # ggml/include/ggml.h -> include/ggml.h # ggml/include/ggml-alloc.h -> include/ggml-alloc.h # ggml/include/ggml-backend.h -> include/ggml-backend.h + # ggml/include/ggml-blas.h -> include/ggml-blas.h + # ggml/include/ggml-cuda.h -> include/ggml-cuda.h + # ggml/include/ggml-kompute.h -> include/ggml-kompute.h + # ggml/include/ggml-metal.h -> include/ggml-metal.h + # ggml/include/ggml-rpc.h -> include/ggml-rpc.h + # ggml/include/ggml-sycl.h -> include/ggml-sycl.h + # ggml/include/ggml-vulkan.h -> include/ggml-vulkan.h # # examples/common.h -> examples/common.h # examples/common.cpp -> examples/common.cpp @@ -149,24 +150,25 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then -e 's/\/ggml\/src\/ggml-common\.h/\/src\/ggml-common.h/g' \ -e 's/\/ggml\/src\/ggml-cuda\//\/src\/ggml-cuda\//g' \ -e 's/\/ggml\/src\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \ - -e 's/\/ggml\/src\/ggml-cuda\.h/\/src\/ggml-cuda.h/g' \ -e 's/\/ggml\/src\/ggml-impl\.h/\/src\/ggml-impl.h/g' \ -e 's/\/ggml\/src\/ggml-kompute\.cpp/\/src\/ggml-kompute.cpp/g' \ - -e 's/\/ggml\/src\/ggml-kompute\.h/\/src\/ggml-kompute.h/g' \ - -e 's/\/ggml\/src\/ggml-metal\.h/\/src\/ggml-metal.h/g' \ -e 's/\/ggml\/src\/ggml-metal\.m/\/src\/ggml-metal.m/g' \ -e 's/\/ggml\/src\/ggml-quants\.c/\/src\/ggml-quants.c/g' \ -e 's/\/ggml\/src\/ggml-quants\.h/\/src\/ggml-quants.h/g' \ -e 's/\/ggml\/src\/ggml-rpc\.cpp/\/src\/ggml-rpc.cpp/g' \ - -e 's/\/ggml\/src\/ggml-rpc\.h/\/src\/ggml-rpc.h/g' \ -e 's/\/ggml\/src\/ggml-sycl\//\/src\/ggml-sycl\//g' \ -e 's/\/ggml\/src\/ggml-sycl\.cpp/\/src\/ggml-sycl.cpp/g' \ - -e 's/\/ggml\/src\/ggml-sycl\.h/\/src\/ggml-sycl.h/g' \ -e 's/\/ggml\/src\/ggml-vulkan\.cpp/\/src\/ggml-vulkan.cpp/g' \ - -e 's/\/ggml\/src\/ggml-vulkan\.h/\/src\/ggml-vulkan.h/g' \ -e 's/\/ggml\/include\/ggml\.h/\/include\/ggml.h/g' \ -e 's/\/ggml\/include\/ggml-alloc\.h/\/include\/ggml-alloc.h/g' \ -e 's/\/ggml\/include\/ggml-backend\.h/\/include\/ggml-backend.h/g' \ + -e 's/\/ggml\/include\/ggml-blas\.h/\/include\/ggml-blas.h/g' \ + -e 's/\/ggml\/include\/ggml-cuda\.h/\/include\/ggml-cuda.h/g' \ + -e 's/\/ggml\/include\/ggml-kompute\.h/\/include\/ggml-kompute.h/g' \ + -e 's/\/ggml\/include\/ggml-metal\.h/\/include\/ggml-metal.h/g' \ + -e 's/\/ggml\/include\/ggml-rpc\.h/\/include\/ggml-rpc.h/g' \ + -e 's/\/ggml\/include\/ggml-sycl\.h/\/include\/ggml-sycl.h/g' \ + -e 's/\/ggml\/include\/ggml-vulkan\.h/\/include\/ggml-vulkan.h/g' \ -e 's/\/examples\/common\.h/\/examples\/common.h/g' \ -e 's/\/examples\/common\.cpp/\/examples\/common.cpp/g' \ -e 's/\/examples\/common-ggml\.h/\/examples\/common-ggml.h/g' \ diff --git a/scripts/sync-whisper.sh b/scripts/sync-whisper.sh index 77a803924..b78e66ada 100755 --- a/scripts/sync-whisper.sh +++ b/scripts/sync-whisper.sh @@ -13,26 +13,27 @@ cp -rpv ../whisper.cpp/ggml/src/ggml-blas.h src/ggml-blas.h cp -rpv ../whisper.cpp/ggml/src/ggml-common.h src/ggml-common.h cp -rpv ../whisper.cpp/ggml/src/ggml-cuda/* src/ggml-cuda/ cp -rpv ../whisper.cpp/ggml/src/ggml-cuda.cu src/ggml-cuda.cu -cp -rpv ../whisper.cpp/ggml/src/ggml-cuda.h src/ggml-cuda.h cp -rpv ../whisper.cpp/ggml/src/ggml-impl.h src/ggml-impl.h cp -rpv ../whisper.cpp/ggml/src/ggml-kompute.cpp src/ggml-kompute.cpp -cp -rpv ../whisper.cpp/ggml/src/ggml-kompute.h src/ggml-kompute.h -cp -rpv ../whisper.cpp/ggml/src/ggml-metal.h src/ggml-metal.h cp -rpv ../whisper.cpp/ggml/src/ggml-metal.m src/ggml-metal.m cp -rpv ../whisper.cpp/ggml/src/ggml-metal.metal src/ggml-metal.metal cp -rpv ../whisper.cpp/ggml/src/ggml-quants.c src/ggml-quants.c cp -rpv ../whisper.cpp/ggml/src/ggml-quants.h src/ggml-quants.h cp -rpv ../whisper.cpp/ggml/src/ggml-rpc.cpp src/ggml-rpc.cpp -cp -rpv ../whisper.cpp/ggml/src/ggml-rpc.h src/ggml-rpc.h cp -rpv ../whisper.cpp/ggml/src/ggml-sycl/* src/ggml-sycl/ cp -rpv ../whisper.cpp/ggml/src/ggml-sycl.cpp src/ggml-sycl.cpp -cp -rpv ../whisper.cpp/ggml/src/ggml-sycl.h src/ggml-sycl.h cp -rpv ../whisper.cpp/ggml/src/ggml-vulkan.cpp src/ggml-vulkan.cpp -cp -rpv ../whisper.cpp/ggml/src/ggml-vulkan.h src/ggml-vulkan.h cp -rpv ../whisper.cpp/ggml/include/ggml.h include/ggml.h cp -rpv ../whisper.cpp/ggml/include/ggml-alloc.h include/ggml-alloc.h cp -rpv ../whisper.cpp/ggml/include/ggml-backend.h include/ggml-backend.h +cp -rpv ../whisper.cpp/ggml/include/ggml-blas.h include/ggml-blas.h +cp -rpv ../whisper.cpp/ggml/include/ggml-cuda.h include/ggml-cuda.h +cp -rpv ../whisper.cpp/ggml/include/ggml-kompute.h include/ggml-kompute.h +cp -rpv ../whisper.cpp/ggml/include/ggml-metal.h include/ggml-metal.h +cp -rpv ../whisper.cpp/ggml/include/ggml-rpc.h include/ggml-rpc.h +cp -rpv ../whisper.cpp/ggml/include/ggml-sycl.h include/ggml-sycl.h +cp -rpv ../whisper.cpp/ggml/include/ggml-vulkan.h include/ggml-vulkan.h cp -rpv ../whisper.cpp/examples/common.h examples/common.h cp -rpv ../whisper.cpp/examples/common.cpp examples/common.cpp diff --git a/spm-headers/ggml-metal.h b/spm-headers/ggml-metal.h new file mode 120000 index 000000000..b5e5471fc --- /dev/null +++ b/spm-headers/ggml-metal.h @@ -0,0 +1 @@ +../include/ggml-metal.h \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5e6d25f2e..ba341d374 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,9 +1,8 @@ include(CheckCXXCompilerFlag) -unset(GGML_CDEF_PRIVATE) unset(GGML_CDEF_PUBLIC) -list(APPEND GGML_CDEF_PRIVATE GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) +add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) # enable libstdc++ assertions for debug builds if (CMAKE_SYSTEM_NAME MATCHES "Linux") @@ -32,9 +31,9 @@ if (APPLE AND GGML_ACCELERATE) if (ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") - list(APPEND GGML_CDEF_PRIVATE GGML_USE_ACCELERATE) - list(APPEND GGML_CDEF_PRIVATE ACCELERATE_NEW_LAPACK) - list(APPEND GGML_CDEF_PRIVATE ACCELERATE_LAPACK_ILP64) + add_compile_definitions(GGML_USE_ACCELERATE) + add_compile_definitions(ACCELERATE_NEW_LAPACK) + add_compile_definitions(ACCELERATE_LAPACK_ILP64) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) else() @@ -48,12 +47,12 @@ if (GGML_METAL) find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) message(STATUS "Metal framework found") - set(GGML_HEADERS_METAL ggml-metal.h) + set(GGML_HEADERS_METAL ../include/ggml-metal.h) set(GGML_SOURCES_METAL ggml-metal.m) list(APPEND GGML_CDEF_PUBLIC GGML_USE_METAL) if (GGML_METAL_NDEBUG) - list(APPEND GGML_CDEF_PRIVATE GGML_METAL_NDEBUG) + add_compile_definitions(GGML_METAL_NDEBUG) endif() # copy ggml-common.h and ggml-metal.metal to bin directory @@ -63,7 +62,7 @@ if (GGML_METAL) if (GGML_METAL_EMBED_LIBRARY) enable_language(ASM) - list(APPEND GGML_CDEF_PRIVATE GGML_METAL_EMBED_LIBRARY) + add_compile_definitions(GGML_METAL_EMBED_LIBRARY) set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") @@ -145,7 +144,7 @@ if (GGML_OPENMP) if (OpenMP_FOUND) message(STATUS "OpenMP found") - list(APPEND GGML_CDEF_PRIVATE GGML_USE_OPENMP) + add_compile_definitions(GGML_USE_OPENMP) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) else() @@ -223,10 +222,10 @@ if (GGML_BLAS) list(APPEND GGML_CDEF_PUBLIC GGML_USE_BLAS) if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) - list(APPEND GGML_CDEF_PRIVATE GGML_BLAS_USE_MKL) + add_compile_definitions(GGML_BLAS_USE_MKL) endif() - set(GGML_HEADERS_BLAS ggml-blas.h) + set(GGML_HEADERS_BLAS ../include/ggml-blas.h) set(GGML_SOURCES_BLAS ggml-blas.cpp) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES}) @@ -241,7 +240,7 @@ endif() if (GGML_LLAMAFILE) message(STATUS "Using ggml SGEMM") - list(APPEND GGML_CDEF_PRIVATE GGML_USE_LLAMAFILE) + add_compile_definitions(GGML_USE_LLAMAFILE) set(GGML_HEADERS_LLAMAFILE sgemm.h) set(GGML_SOURCES_LLAMAFILE sgemm.cpp) @@ -272,7 +271,7 @@ if (GGML_CUDA) enable_language(CUDA) file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh") - list(APPEND GGML_HEADERS_CUDA "ggml-cuda.h") + list(APPEND GGML_HEADERS_CUDA "../include/ggml-cuda.h") file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") @@ -284,7 +283,7 @@ if (GGML_CUDA) if (GGML_CUDA_FA_ALL_QUANTS) file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FA_ALL_QUANTS) + add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) else() file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) @@ -296,18 +295,18 @@ if (GGML_CUDA) list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_USE_GRAPHS) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) - list(APPEND GGML_CDEF_PRIVATE K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) + add_compile_definitions(GGML_CUDA_USE_GRAPHS) + add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) + add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) + add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) + add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) if (GGML_CUDA_FORCE_DMMV) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_DMMV) + add_compile_definitions(GGML_CUDA_FORCE_DMMV) endif() if (GGML_CUDA_FORCE_MMQ) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) endif() if (GGML_CUDA_FORCE_CUBLAS) @@ -315,19 +314,19 @@ if (GGML_CUDA) endif() if (GGML_CUDA_NO_VMM) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_VMM) + add_compile_definitions(GGML_CUDA_NO_VMM) endif() if (DEFINED GGML_CUDA_DMMV_Y) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility + add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility endif() if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_F16) + add_compile_definitions(GGML_CUDA_F16) endif() if (GGML_CUDA_NO_PEER_COPY) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) endif() if (GGML_STATIC) @@ -397,7 +396,7 @@ if (GGML_HIPBLAS) message(STATUS "HIP and hipBLAS found") file(GLOB GGML_HEADERS_ROCM "ggml-cuda/*.cuh") - list(APPEND GGML_HEADERS_ROCM "ggml-cuda.h") + list(APPEND GGML_HEADERS_ROCM "../include/ggml-cuda.h") file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu") list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu") @@ -409,7 +408,7 @@ if (GGML_HIPBLAS) if (GGML_CUDA_FA_ALL_QUANTS) file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FA_ALL_QUANTS) + add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) else() file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) @@ -421,25 +420,25 @@ if (GGML_HIPBLAS) list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA) - list(APPEND GGML_CDEF_PRIVATE GGML_USE_HIPBLAS) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) - list(APPEND GGML_CDEF_PRIVATE K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) + add_compile_definitions(GGML_USE_HIPBLAS) + add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) + add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) + add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) if (GGML_HIP_UMA) - list(APPEND GGML_CDEF_PRIVATE GGML_HIP_UMA) + add_compile_definitions(GGML_HIP_UMA) endif() if (GGML_CUDA_FORCE_DMMV) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_DMMV) + add_compile_definitions(GGML_CUDA_FORCE_DMMV) endif() if (GGML_CUDA_FORCE_MMQ) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) endif() if (GGML_CUDA_NO_PEER_COPY) - list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) endif() if (CXX_IS_HIPCC) @@ -474,11 +473,11 @@ if (GGML_SYCL) list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL) if (GGML_SYCL_F16) - list(APPEND GGML_CDEF_PRIVATE GGML_SYCL_F16) + add_compile_definitions(GGML_SYCL_F16) endif() if (GGML_CUDA_FORCE_MMQ) - list(APPEND GGML_CDEF_PRIVATE GGML_SYCL_FORCE_MMQ) + add_compile_definitions(GGML_SYCL_FORCE_MMQ) endif() add_compile_options(-I./) #include DPCT @@ -490,7 +489,7 @@ if (GGML_SYCL) endif() file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp") - list(APPEND GGML_HEADERS_SYCL "ggml-sycl.h") + list(APPEND GGML_HEADERS_SYCL "../include/ggml-sycl.h") file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp") list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") @@ -518,7 +517,7 @@ if (GGML_RPC) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32) endif() - set(GGML_HEADERS_RPC ggml-rpc.h) + set(GGML_HEADERS_RPC ../include/ggml-rpc.h) set(GGML_SOURCES_RPC ggml-rpc.cpp) endif() @@ -528,7 +527,7 @@ if (GGML_VULKAN) if (Vulkan_FOUND) message(STATUS "Vulkan found") - set(GGML_HEADERS_VULKAN ggml-vulkan.h) + set(GGML_HEADERS_VULKAN ../include/ggml-vulkan.h) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) list(APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN) @@ -540,23 +539,23 @@ if (GGML_VULKAN) endif() if (GGML_VULKAN_CHECK_RESULTS) - list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_CHECK_RESULTS) + add_compile_definitions(GGML_VULKAN_CHECK_RESULTS) endif() if (GGML_VULKAN_DEBUG) - list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_DEBUG) + add_compile_definitions(GGML_VULKAN_DEBUG) endif() if (GGML_VULKAN_MEMORY_DEBUG) - list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_MEMORY_DEBUG) + add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG) endif() if (GGML_VULKAN_VALIDATE) - list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_VALIDATE) + add_compile_definitions(GGML_VULKAN_VALIDATE) endif() if (GGML_VULKAN_RUN_TESTS) - list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_RUN_TESTS) + add_compile_definitions(GGML_VULKAN_RUN_TESTS) endif() set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan) @@ -713,8 +712,8 @@ if (GGML_KOMPUTE) ) # Add the stamp to the main sources to ensure dependency tracking - set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + set(GGML_HEADERS_KOMPUTE ../include/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE) @@ -730,7 +729,7 @@ if (GGML_CPU_HBM) message(STATUS "Using memkind for CPU HBM") - list(APPEND GGML_CDEF_PRIVATE GGML_USE_CPU_HBM) + add_compile_definitions(GGML_USE_CPU_HBM) target_link_libraries(ggml PUBLIC memkind) endif() @@ -874,7 +873,7 @@ execute_process( ) if (output MATCHES "dyld-1015\.7") - list(APPEND GGML_CDEF_PRIVATE HAVE_BUGGY_APPLE_LINKER) + add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) endif() # architecture specific @@ -1156,10 +1155,9 @@ if (EMSCRIPTEN) endif() target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) -target_compile_definitions(ggml PRIVATE ${GGML_CDEF_PRIVATE}) - -target_include_directories(ggml PUBLIC . ../include ${GGML_EXTRA_INCLUDES}) -target_compile_features (ggml PUBLIC c_std_11) # don't bump +target_include_directories(ggml PUBLIC ../include) +target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) +target_compile_features (ggml PRIVATE c_std_11) # don't bump target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) diff --git a/src/ggml-cuda/mma.cuh b/src/ggml-cuda/mma.cuh index 0301a52f9..5d87dd8e6 100644 --- a/src/ggml-cuda/mma.cuh +++ b/src/ggml-cuda/mma.cuh @@ -23,7 +23,7 @@ struct mma_int_A_I16K4 { __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { #if defined(INT8_MMA_AVAILABLE) - const int * xs = xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); + const int * xs = xs0 + (threadIdx.x%I)*stride; asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" : "+r"(x[0]), "+r"(x[1]) : "l"(xs)); diff --git a/src/ggml-sycl.cpp b/src/ggml-sycl.cpp index db045336f..4a668a2c3 100644 --- a/src/ggml-sycl.cpp +++ b/src/ggml-sycl.cpp @@ -37,6 +37,7 @@ #include "ggml-backend-impl.h" #include "ggml-sycl/backend.hpp" +#include "ggml-sycl/presets.hpp" bool ggml_sycl_loaded(void); void ggml_sycl_free_data(struct ggml_tensor * tensor); diff --git a/src/ggml-sycl/common.hpp b/src/ggml-sycl/common.hpp index 414c37eed..e01f91633 100644 --- a/src/ggml-sycl/common.hpp +++ b/src/ggml-sycl/common.hpp @@ -17,6 +17,7 @@ #include #include "dpct/helper.hpp" +#include "ggml-sycl.h" #include "presets.hpp" #define GGML_COMMON_DECL_SYCL diff --git a/src/ggml-sycl/presets.hpp b/src/ggml-sycl/presets.hpp index 5e6b61813..fe9d41770 100644 --- a/src/ggml-sycl/presets.hpp +++ b/src/ggml-sycl/presets.hpp @@ -15,8 +15,6 @@ #define GGML_SYCL_MAX_STREAMS 8 #define GGML_SYCL_MAX_BUFFERS 256 -#define GGML_SYCL_MAX_DEVICES 48 -#define GGML_SYCL_NAME "SYCL" #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses diff --git a/tests/test-backend-buffer.cpp b/tests/test-backend-buffer.cpp index 71181d92a..e8b62f282 100644 --- a/tests/test-backend-buffer.cpp +++ b/tests/test-backend-buffer.cpp @@ -1,11 +1,10 @@ -#include #include #include #include -#include -#include -#include +#include +#include +#include static bool is_pow2(size_t x) { return (x & (x - 1)) == 0; diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 1ed74e543..f74c0db47 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include