Skip to content

Commit

Permalink
feat: trying support QNN
Browse files Browse the repository at this point in the history
  • Loading branch information
hans00 committed Apr 30, 2024
1 parent 2899f8b commit 5e9ac90
Show file tree
Hide file tree
Showing 6 changed files with 5,533 additions and 1 deletion.
70 changes: 70 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cmake_policy(SET CMP0042 NEW)
project (llama-node)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

if(NOT DEFINED napi_build_version)
set(napi_build_version 6)
Expand Down Expand Up @@ -77,6 +80,73 @@ file(
"src/SaveSessionWorker.h"
)

if (LLAMA_QNN)
if (PLATFORM STREQUAL "linux" AND ARCH STREQUAL "x64")
set(QNN_PLATFORM "x86_64-linux-clang")
elseif (PLATFORM STREQUAL "linux" AND ARCH STREQUAL "arm64")
set(QNN_PLATFORM "aarch64-ubuntu-gcc7.5")
elseif (PLATFORM STREQUAL "win32" AND ARCH STREQUAL "x64")
set(QNN_PLATFORM "x86_64-windows-msvc")
elseif (PLATFORM STREQUAL "win32" AND ARCH STREQUAL "arm64")
set(QNN_PLATFORM "aarch64-windows-msvc")
endif()

if (NOT QNN_PLATFORM)
message(FATAL_ERROR "QNN is not supported on this platform")
endif()
set(QNN_LIB_PATH ${QNN_ROOT}/lib/${QNN_PLATFORM})

file(
GLOB QNN_SO_FILES
"${QNN_LIB_PATH}/libc++*"
"${QNN_LIB_PATH}/libQnn*.so"
"${QNN_LIB_PATH}/Htp*.dll"
"${QNN_LIB_PATH}/Qnn*"
)

file(COPY ${QNN_SO_FILES} DESTINATION ${PLATFORM_BINARY_DIR})

file(
GLOB QNN_EXTRA_FILES
"${QNN_ROOT}/lib/hexagon-v*/unsigned/libQnn*Skel.so"
"${QNN_ROOT}/lib/hexagon-v*/unsigned/*.cat"
)

file(COPY ${QNN_EXTRA_FILES} DESTINATION ${PLATFORM_BINARY_DIR})

list(APPEND LINKS ${QNN_SO_FILES})

file(
GLOB QNN_HEADER_FILES
"src/ggml-qnn.h"
)

file(
GLOB QNN_SOURCE_FILES
"src/ggml-qnn.cpp"
)

target_compile_definitions(ggml PUBLIC GGML_USE_QNN)
target_include_directories(ggml PUBLIC ${QNN_ROOT}/include/QNN)
target_sources(ggml PRIVATE ${QNN_SOURCE_FILES} ${QNN_HEADER_FILES})
target_include_directories(llama PRIVATE "src")
set_target_properties(ggml PROPERTIES CXX_STANDARD 17)

# apply patches/qnn.patch to ggml
add_custom_command(
OUTPUT ${CMAKE_BUILD_DIR}/patch.log
COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/qnn.patch
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
)
else()
# undo patches/qnn.patch to ggml
add_custom_command(
OUTPUT ${CMAKE_BUILD_DIR}/patch.log
COMMAND git apply -R ${CMAKE_SOURCE_DIR}/patches/qnn.patch
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
)
endif()

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
"bin/**/*",
"scripts/*.js",
"scripts/*.ts",
"src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
"src/*",
"externals/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
"lib/*.js",
"lib/*.ts",
"CMakeLists.txt"
Expand Down
74 changes: 74 additions & 0 deletions patches/qnn.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
diff --git a/ggml-backend.c b/ggml-backend.c
index f5bdcf07..536a5767 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -416,7 +416,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
}

initialized = true;
-
+ printf("GGML_USE_CPU\n");
ggml_backend_register("CPU", ggml_backend_reg_cpu_init, ggml_backend_cpu_buffer_type(), NULL);

// add forward decls here to avoid including the backend headers
@@ -445,6 +445,10 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
ggml_backend_kompute_reg_devices();
#endif
+#ifdef GGML_USE_QNN
+ extern GGML_CALL void ggml_backend_qnn_reg_devices(void);
+ ggml_backend_qnn_reg_devices();
+#endif
}

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
diff --git a/llama.cpp b/llama.cpp
index 18d6297c..f2a39613 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17,6 +17,8 @@
# include "ggml-sycl.h"
#elif defined(GGML_USE_KOMPUTE)
# include "ggml-kompute.h"
+#elif defined(GGML_USE_QNN)
+# include "ggml-qnn.h"
#endif

#ifdef GGML_USE_METAL
@@ -1679,6 +1681,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
buft = ggml_backend_opencl_buffer_type();
#elif defined(GGML_USE_KOMPUTE)
buft = ggml_backend_kompute_buffer_type(gpu);
+#elif defined(GGML_USE_QNN)
+ buft = ggml_backend_qnn_buffer_type(gpu);
if (buft == nullptr) {
LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu);
}
@@ -15293,8 +15297,9 @@ bool llama_supports_mlock(void) {

bool llama_supports_gpu_offload(void) {
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
- defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE)
+ defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_QNN)
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
+ printf("llama_supports_gpu_offload: true\n");
return true;
#else
return false;
@@ -15607,6 +15612,16 @@ struct llama_context * llama_new_context_with_model(
}
ctx->backends.push_back(backend);
}
+#elif defined(GGML_USE_QNN)
+ if (model->n_gpu_layers > 0) {
+ auto * backend = ggml_backend_qnn_init(model->main_gpu);
+ if (backend == nullptr) {
+ LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__);
+ llama_free(ctx);
+ return nullptr;
+ }
+ ctx->backends.push_back(backend);
+ }
#endif
ctx->backend_cpu = ggml_backend_cpu_init();
if (ctx->backend_cpu == nullptr) {
1 change: 1 addition & 0 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
params.use_mmap = get_option<bool>(options, "use_mmap", true);
params.numa =
static_cast<ggml_numa_strategy>(get_option<uint32_t>(options, "numa", 0));
params.main_gpu = get_option<int32_t>(options, "main_gpu", 0);

llama_backend_init();
llama_numa_init(params.numa);
Expand Down
Loading

0 comments on commit 5e9ac90

Please sign in to comment.