Skip to content

Commit

Permalink
fix: fix windows build
Browse files Browse the repository at this point in the history
  • Loading branch information
hans00 committed May 1, 2024
1 parent d5a62f7 commit a7a3099
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 56 deletions.
37 changes: 19 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
add_subdirectory("src/llama.cpp")

# apply patches
set(PATCH_FILE ${CMAKE_SOURCE_DIR}/patches/llama.patch)
add_custom_target(patch)
add_custom_command(
TARGET patch
COMMAND patch -N -p1 < ${PATCH_FILE} || true
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp

)
add_dependencies(ggml patch)

file(
GLOB SOURCE_FILES
"src/addons.cc"
Expand Down Expand Up @@ -95,6 +106,7 @@ if (LLAMA_QNN)
message(FATAL_ERROR "QNN is not supported on this platform")
endif()
set(QNN_LIB_PATH ${QNN_ROOT}/lib/${QNN_PLATFORM})
message(STATUS "QNN_LIB_PATH: ${QNN_LIB_PATH}")

file(
GLOB QNN_SO_FILES
Expand All @@ -118,33 +130,22 @@ if (LLAMA_QNN)

file(
GLOB QNN_HEADER_FILES
"src/ggml-qnn.h"
"src/ggml-qnn/ggml-qnn.h"
)

file(
GLOB QNN_SOURCE_FILES
"src/ggml-qnn.cpp"
"src/ggml-qnn/pthread-shim.h"
"src/ggml-qnn/ggml-qnn.cpp"
)

target_compile_definitions(ggml PUBLIC GGML_USE_QNN)
target_include_directories(ggml PUBLIC ${QNN_ROOT}/include/QNN)
target_include_directories(ggml PUBLIC ${QNN_ROOT}/include ${QNN_ROOT}/include/QNN)
target_sources(ggml PRIVATE ${QNN_SOURCE_FILES} ${QNN_HEADER_FILES})
target_include_directories(llama PRIVATE "src")
target_include_directories(llama PRIVATE "src/ggml-qnn")
set_target_properties(ggml PROPERTIES CXX_STANDARD 17)

# apply patches/qnn.patch to ggml
add_custom_command(
OUTPUT ${CMAKE_BUILD_DIR}/patch.log
COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/qnn.patch
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
)
else()
# undo patches/qnn.patch to ggml
add_custom_command(
OUTPUT ${CMAKE_BUILD_DIR}/patch.log
COMMAND git apply -R ${CMAKE_SOURCE_DIR}/patches/qnn.patch
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
)
set_target_properties(ggml PROPERTIES CXX_STANDARD_REQUIRED ON)
set_target_properties(ggml PROPERTIES C_STANDARD 11)
endif()

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
Expand Down
6 changes: 2 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,8 @@
},
"files": [
"bin/**/*",
"scripts/*.js",
"scripts/*.ts",
"src/*",
"externals/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
"patches/*",
"src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
"lib/*.js",
"lib/*.ts",
"CMakeLists.txt"
Expand Down
21 changes: 5 additions & 16 deletions patches/qnn.patch → patches/llama.patch
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
diff --git a/ggml-backend.c b/ggml-backend.c
index f5bdcf07..536a5767 100644
index e91d97cd..be4989d3 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -416,7 +416,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
}

initialized = true;
-
+ printf("GGML_USE_CPU\n");
ggml_backend_register("CPU", ggml_backend_reg_cpu_init, ggml_backend_cpu_buffer_type(), NULL);

// add forward decls here to avoid including the backend headers
@@ -445,6 +445,10 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
ggml_backend_kompute_reg_devices();
Expand All @@ -23,7 +14,7 @@ index f5bdcf07..536a5767 100644

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
diff --git a/llama.cpp b/llama.cpp
index 18d6297c..f2a39613 100644
index a25d115c..ff0d929f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17,6 +17,8 @@
Expand All @@ -35,7 +26,7 @@ index 18d6297c..f2a39613 100644
#endif

#ifdef GGML_USE_METAL
@@ -1679,6 +1681,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
@@ -1658,6 +1660,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
buft = ggml_backend_opencl_buffer_type();
#elif defined(GGML_USE_KOMPUTE)
buft = ggml_backend_kompute_buffer_type(gpu);
Expand All @@ -44,18 +35,16 @@ index 18d6297c..f2a39613 100644
if (buft == nullptr) {
LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu);
}
@@ -15293,8 +15297,9 @@ bool llama_supports_mlock(void) {
@@ -14916,7 +14920,7 @@ bool llama_supports_mlock(void) {

bool llama_supports_gpu_offload(void) {
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
- defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE)
+ defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_QNN)
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
+ printf("llama_supports_gpu_offload: true\n");
return true;
#else
return false;
@@ -15607,6 +15612,16 @@ struct llama_context * llama_new_context_with_model(
@@ -15203,6 +15207,16 @@ struct llama_context * llama_new_context_with_model(
}
ctx->backends.push_back(backend);
}
Expand Down
Loading

0 comments on commit a7a3099

Please sign in to comment.