-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
5,533 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
diff --git a/ggml-backend.c b/ggml-backend.c | ||
index f5bdcf07..536a5767 100644 | ||
--- a/ggml-backend.c | ||
+++ b/ggml-backend.c | ||
@@ -416,7 +416,7 @@ GGML_CALL static void ggml_backend_registry_init(void) { | ||
} | ||
|
||
initialized = true; | ||
- | ||
+ printf("GGML_USE_CPU\n"); | ||
ggml_backend_register("CPU", ggml_backend_reg_cpu_init, ggml_backend_cpu_buffer_type(), NULL); | ||
|
||
// add forward decls here to avoid including the backend headers | ||
@@ -445,6 +445,10 @@ GGML_CALL static void ggml_backend_registry_init(void) { | ||
extern GGML_CALL void ggml_backend_kompute_reg_devices(void); | ||
ggml_backend_kompute_reg_devices(); | ||
#endif | ||
+#ifdef GGML_USE_QNN | ||
+ extern GGML_CALL void ggml_backend_qnn_reg_devices(void); | ||
+ ggml_backend_qnn_reg_devices(); | ||
+#endif | ||
} | ||
|
||
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { | ||
diff --git a/llama.cpp b/llama.cpp | ||
index 18d6297c..f2a39613 100644 | ||
--- a/llama.cpp | ||
+++ b/llama.cpp | ||
@@ -17,6 +17,8 @@ | ||
# include "ggml-sycl.h" | ||
#elif defined(GGML_USE_KOMPUTE) | ||
# include "ggml-kompute.h" | ||
+#elif defined(GGML_USE_QNN) | ||
+# include "ggml-qnn.h" | ||
#endif | ||
|
||
#ifdef GGML_USE_METAL | ||
@@ -1679,6 +1681,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) { | ||
buft = ggml_backend_opencl_buffer_type(); | ||
#elif defined(GGML_USE_KOMPUTE) | ||
buft = ggml_backend_kompute_buffer_type(gpu); | ||
+#elif defined(GGML_USE_QNN) | ||
+ buft = ggml_backend_qnn_buffer_type(gpu); | ||
if (buft == nullptr) { | ||
LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu); | ||
} | ||
@@ -15293,8 +15297,9 @@ bool llama_supports_mlock(void) { | ||
|
||
bool llama_supports_gpu_offload(void) { | ||
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ | ||
- defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) | ||
+ defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_QNN) | ||
// Defined when llama.cpp is compiled with support for offloading model layers to GPU. | ||
+ printf("llama_supports_gpu_offload: true\n"); | ||
return true; | ||
#else | ||
return false; | ||
@@ -15607,6 +15612,16 @@ struct llama_context * llama_new_context_with_model( | ||
} | ||
ctx->backends.push_back(backend); | ||
} | ||
+#elif defined(GGML_USE_QNN) | ||
+ if (model->n_gpu_layers > 0) { | ||
+ auto * backend = ggml_backend_qnn_init(model->main_gpu); | ||
+ if (backend == nullptr) { | ||
+ LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__); | ||
+ llama_free(ctx); | ||
+ return nullptr; | ||
+ } | ||
+ ctx->backends.push_back(backend); | ||
+ } | ||
#endif | ||
ctx->backend_cpu = ggml_backend_cpu_init(); | ||
if (ctx->backend_cpu == nullptr) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.