From 14b549c70814ff9848998366a501975faea2284b Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 9 Aug 2024 15:35:39 +0200 Subject: [PATCH 1/9] ggml : move rope type enum to ggml.h This commit moves the `llama_rope_type` enum from `llama.h` to `ggml.h` and changes its name to `ggml_rope_type`. The motivation for this change is to address the TODO in `llama.h` and use the enum in ggml. Note: This commit does not change the `mode` parameter to be of type `enum ggml_rope_type`. The name `mode` and its usage suggest that it might be more generic and possibly used as a bit field for multiple flags. Further investigation/discussion may be needed to determine if `mode` should be restricted to RoPE types. --- ggml/include/ggml.h | 8 ++++++++ ggml/src/ggml.c | 6 +++--- include/llama.h | 11 +---------- src/llama.cpp | 18 +++++++++--------- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 15602a96df7ad..11b3e17da68f5 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -437,6 +437,14 @@ extern "C" { GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors }; + // Rotary Positional Embedding (RoPE) types + enum ggml_rope_type { + GGML_ROPE_TYPE_NONE = -1, + GGML_ROPE_TYPE_NORM = 0, + GGML_ROPE_TYPE_NEOX = 2, + GGML_ROPE_TYPE_GLM = 4, + }; + // available tensor operations: enum ggml_op { GGML_OP_NONE = 0, diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index c937b5e537c54..77d2cd3339d64 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back( GGML_ASSERT(a->ne[2] == b->ne[0]); GGML_ASSERT(c == NULL && "freq factors not implemented yet"); - GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet"); + GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet"); bool is_node = false; @@ -14093,7 +14093,7 @@ static void ggml_compute_forward_rope_f32( float corr_dims[2]; ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; const float * freq_factors = NULL; if (src2 != NULL) { @@ -14218,7 +14218,7 @@ static void ggml_compute_forward_rope_f16( float corr_dims[2]; ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; const float * freq_factors = NULL; if (src2 != NULL) { diff --git a/include/llama.h b/include/llama.h index 66c266298e86f..360f2d7274a09 100644 --- a/include/llama.h +++ b/include/llama.h @@ -95,15 +95,6 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, }; - // note: these values should be synchronized with ggml_rope - // TODO: maybe move this enum to ggml.h (ggml_rope_type) - enum llama_rope_type { - LLAMA_ROPE_TYPE_NONE = -1, - LLAMA_ROPE_TYPE_NORM = 0, - LLAMA_ROPE_TYPE_NEOX = 2, - LLAMA_ROPE_TYPE_GLM = 4, - }; - enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file LLAMA_TOKEN_TYPE_UNDEFINED = 0, LLAMA_TOKEN_TYPE_NORMAL = 1, @@ -462,7 +453,7 @@ extern "C" { LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); - LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); + LLAMA_API enum ggml_rope_type ggml_rope_type (const struct llama_model * model); LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); diff --git a/src/llama.cpp b/src/llama.cpp index be6dbf88a7790..3cbb63938ca37 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2201,7 +2201,7 @@ struct llama_hparams { llama_token dec_start_token_id = -1; enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE; - enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE; + enum ggml_rope_type rope_type = GGML_ROPE_TYPE_NONE; enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE; bool operator!=(const llama_hparams & other) const { @@ -5219,7 +5219,7 @@ static void llm_load_hparams( hparams.use_alibi = true; } - hparams.rope_type = llama_rope_type(&model); + hparams.rope_type = ggml_rope_type(&model); } static void llm_load_vocab( @@ -8331,7 +8331,7 @@ struct llm_build_context { const bool flash_attn; const enum llama_pooling_type pooling_type; - const enum llama_rope_type rope_type; + const enum ggml_rope_type rope_type; const llm_build_cb & cb; @@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { bool need_reserve = false; // apply K-shift if needed - if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { + if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA GGML_ABORT("Deepseek2 does not support K-shift"); } @@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) { return model->vocab.type; } -enum llama_rope_type llama_rope_type(const struct llama_model * model) { +enum ggml_rope_type ggml_rope_type(const struct llama_model * model) { switch (model->arch) { // these models do not use RoPE case LLM_ARCH_GPT2: @@ -16893,7 +16893,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { case LLM_ARCH_JINA_BERT_V2: case LLM_ARCH_T5: case LLM_ARCH_JAIS: - return LLAMA_ROPE_TYPE_NONE; + return GGML_ROPE_TYPE_NONE; // use what we call a normal RoPE, operating on pairs of consecutive head values case LLM_ARCH_LLAMA: @@ -16909,7 +16909,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { case LLM_ARCH_ARCTIC: case LLM_ARCH_DEEPSEEK2: case LLM_ARCH_CHATGLM: - return LLAMA_ROPE_TYPE_NORM; + return GGML_ROPE_TYPE_NORM; // the pairs of head values are offset by n_rot/2 case LLM_ARCH_FALCON: @@ -16930,14 +16930,14 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { case LLM_ARCH_OPENELM: case LLM_ARCH_GPTNEOX: case LLM_ARCH_CODESHELL: - return LLAMA_ROPE_TYPE_NEOX; + return GGML_ROPE_TYPE_NEOX; // all model arches should be listed explicitly here case LLM_ARCH_UNKNOWN: GGML_ABORT("unknown architecture"); } - return LLAMA_ROPE_TYPE_NONE; + return GGML_ROPE_TYPE_NONE; } enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) { From 5e14dbf2ea5210adbb637f3779f93dd40654c75c Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 10 Aug 2024 06:50:54 +0200 Subject: [PATCH 2/9] squash! ggml : move rope type enum to ggml.h This commit removes GGML_ROPE_TYPE_NONE and GGML_ROPE_TYPE_GLM from ggml.h, and back the llama_rope_type enum. I've kept the assert for GGML_ROPE_TYPE_GLM as I'm not sure if it is safe to remove it yet. --- ggml/include/ggml.h | 2 -- ggml/src/ggml.c | 2 +- include/llama.h | 9 ++++++++- src/llama.cpp | 18 +++++++++--------- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 11b3e17da68f5..22f5dfed4fa2e 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -439,10 +439,8 @@ extern "C" { // Rotary Positional Embedding (RoPE) types enum ggml_rope_type { - GGML_ROPE_TYPE_NONE = -1, GGML_ROPE_TYPE_NORM = 0, GGML_ROPE_TYPE_NEOX = 2, - GGML_ROPE_TYPE_GLM = 4, }; // available tensor operations: diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 77d2cd3339d64..b3f37ac81e0bc 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back( GGML_ASSERT(a->ne[2] == b->ne[0]); GGML_ASSERT(c == NULL && "freq factors not implemented yet"); - GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet"); + GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet"); bool is_node = false; diff --git a/include/llama.h b/include/llama.h index 360f2d7274a09..f192d8106364d 100644 --- a/include/llama.h +++ b/include/llama.h @@ -95,6 +95,13 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, }; + enum llama_rope_type { + LLAMA_ROPE_TYPE_NONE = -1, + LLAMA_ROPE_TYPE_NORM = GGML_ROPE_TYPE_NORM, + LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, + }; + + enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file LLAMA_TOKEN_TYPE_UNDEFINED = 0, LLAMA_TOKEN_TYPE_NORMAL = 1, @@ -453,7 +460,7 @@ extern "C" { LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); - LLAMA_API enum ggml_rope_type ggml_rope_type (const struct llama_model * model); + LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); diff --git a/src/llama.cpp b/src/llama.cpp index 3cbb63938ca37..be6dbf88a7790 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2201,7 +2201,7 @@ struct llama_hparams { llama_token dec_start_token_id = -1; enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE; - enum ggml_rope_type rope_type = GGML_ROPE_TYPE_NONE; + enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE; enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE; bool operator!=(const llama_hparams & other) const { @@ -5219,7 +5219,7 @@ static void llm_load_hparams( hparams.use_alibi = true; } - hparams.rope_type = ggml_rope_type(&model); + hparams.rope_type = llama_rope_type(&model); } static void llm_load_vocab( @@ -8331,7 +8331,7 @@ struct llm_build_context { const bool flash_attn; const enum llama_pooling_type pooling_type; - const enum ggml_rope_type rope_type; + const enum llama_rope_type rope_type; const llm_build_cb & cb; @@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { bool need_reserve = false; // apply K-shift if needed - if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { + if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA GGML_ABORT("Deepseek2 does not support K-shift"); } @@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) { return model->vocab.type; } -enum ggml_rope_type ggml_rope_type(const struct llama_model * model) { +enum llama_rope_type llama_rope_type(const struct llama_model * model) { switch (model->arch) { // these models do not use RoPE case LLM_ARCH_GPT2: @@ -16893,7 +16893,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) { case LLM_ARCH_JINA_BERT_V2: case LLM_ARCH_T5: case LLM_ARCH_JAIS: - return GGML_ROPE_TYPE_NONE; + return LLAMA_ROPE_TYPE_NONE; // use what we call a normal RoPE, operating on pairs of consecutive head values case LLM_ARCH_LLAMA: @@ -16909,7 +16909,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) { case LLM_ARCH_ARCTIC: case LLM_ARCH_DEEPSEEK2: case LLM_ARCH_CHATGLM: - return GGML_ROPE_TYPE_NORM; + return LLAMA_ROPE_TYPE_NORM; // the pairs of head values are offset by n_rot/2 case LLM_ARCH_FALCON: @@ -16930,14 +16930,14 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) { case LLM_ARCH_OPENELM: case LLM_ARCH_GPTNEOX: case LLM_ARCH_CODESHELL: - return GGML_ROPE_TYPE_NEOX; + return LLAMA_ROPE_TYPE_NEOX; // all model arches should be listed explicitly here case LLM_ARCH_UNKNOWN: GGML_ABORT("unknown architecture"); } - return GGML_ROPE_TYPE_NONE; + return LLAMA_ROPE_TYPE_NONE; } enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) { From c9206f63be303ce74e53636c278c6294bb7b5100 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 06:58:14 +0200 Subject: [PATCH 3/9] squash! ggml : move rope type enum to ggml.h This commit removes the enum ggml_rope_type from ggml.h and replaces it with a define (GGML_ROPE_TYPE_NEOX). This define is used in the code to check if the mode is set to GPT-NeoX. Also the enum llama_rope_type has been updated to reflect this change. --- ggml/include/ggml.h | 10 +++------- ggml/src/ggml-cann/aclnn_ops.cpp | 2 +- ggml/src/ggml-cuda/rope.cu | 2 +- ggml/src/ggml-metal.m | 2 +- ggml/src/ggml-sycl/rope.cpp | 2 +- ggml/src/ggml-vulkan.cpp | 2 +- ggml/src/kompute-shaders/op_rope_f16.comp | 2 +- ggml/src/kompute-shaders/op_rope_f32.comp | 2 +- include/llama.h | 2 +- 9 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 22f5dfed4fa2e..c352d5cd82938 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -244,6 +244,8 @@ #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 +#define GGML_ROPE_TYPE_NEOX 2 + #define GGUF_MAGIC "GGUF" #define GGUF_VERSION 3 @@ -437,12 +439,6 @@ extern "C" { GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors }; - // Rotary Positional Embedding (RoPE) types - enum ggml_rope_type { - GGML_ROPE_TYPE_NORM = 0, - GGML_ROPE_TYPE_NEOX = 2, - }; - // available tensor operations: enum ggml_op { GGML_OP_NONE = 0, @@ -1460,7 +1456,7 @@ extern "C" { // rotary position embedding // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED) - // if mode & 2 == 1, GPT-NeoX style + // if mode & GGML_ROPE_TYPE_NEOX == 1, GPT-NeoX style // // b is an int32 vector with size a->ne[2], it contains the positions GGML_API struct ggml_tensor * ggml_rope( diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 8c4132f5bb7ad..a4ec8418e2ab3 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -2881,7 +2881,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; // init cos/sin cache ggml_cann_pool_alloc sin_allocator( diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu index 99ec1dd98ca9c..88f586d689cfd 100644 --- a/ggml/src/ggml-cuda/rope.cu +++ b/ggml/src/ggml-cuda/rope.cu @@ -226,7 +226,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float)); memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float)); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; const int32_t * pos = (const int32_t *) src1_d; diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index 9fc08ab3aa5cc..2c4a9e4e478a9 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -2313,7 +2313,7 @@ static enum ggml_status ggml_metal_graph_compute( memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float)); memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float)); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; id pipeline = nil; diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index c7545bcc1a8a9..1f06f78fa3d91 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -226,7 +226,7 @@ void ggml_sycl_op_rope( memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float)); memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float)); - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; const int32_t * pos = (const int32_t *) src1_dd; diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index b0f36a513f84b..3c4d7dc084fc7 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -4050,7 +4050,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const case GGML_OP_ROPE: { const int mode = ((const int32_t *) dst->op_params)[2]; - const bool is_neox = mode & 2; + const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; if (is_neox) { if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { diff --git a/ggml/src/kompute-shaders/op_rope_f16.comp b/ggml/src/kompute-shaders/op_rope_f16.comp index 1a4058b3f1f10..0ecfb2eab527c 100644 --- a/ggml/src/kompute-shaders/op_rope_f16.comp +++ b/ggml/src/kompute-shaders/op_rope_f16.comp @@ -11,7 +11,7 @@ void main() { const uint i2 = gl_WorkGroupID.y; const uint i1 = gl_WorkGroupID.x; - const bool is_neox = (pcs.mode & 2) != 0; + const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0; float corr_dims[2]; rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); diff --git a/ggml/src/kompute-shaders/op_rope_f32.comp b/ggml/src/kompute-shaders/op_rope_f32.comp index 65e03827a2660..cec0fd9a5d10c 100644 --- a/ggml/src/kompute-shaders/op_rope_f32.comp +++ b/ggml/src/kompute-shaders/op_rope_f32.comp @@ -11,7 +11,7 @@ void main() { const uint i2 = gl_WorkGroupID.y; const uint i1 = gl_WorkGroupID.x; - const bool is_neox = (pcs.mode & 2) != 0; + const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0; float corr_dims[2]; rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); diff --git a/include/llama.h b/include/llama.h index f192d8106364d..573230be302a3 100644 --- a/include/llama.h +++ b/include/llama.h @@ -97,7 +97,7 @@ extern "C" { enum llama_rope_type { LLAMA_ROPE_TYPE_NONE = -1, - LLAMA_ROPE_TYPE_NORM = GGML_ROPE_TYPE_NORM, + LLAMA_ROPE_TYPE_NORM = 0, LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, }; From 6261222bd0dc0efd51f0fb0435ad3f16a5b52fd6 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 08:16:59 +0200 Subject: [PATCH 4/9] squash! ggml : move rope type enum to ggml.h This commit contains a suggestion enable the GGML_ROPE_TYPE_NEOX macro/define to be passed to the shader compiler. --- ggml/src/CMakeLists.txt | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 425a2589502eb..2e104890643c1 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -656,10 +656,25 @@ if (GGML_KOMPUTE) message(FATAL_ERROR "glslc not found") endif() + # Function to extract #define value from header file + function(get_define_value HEADER DEFINE_NAME RESULT_VAR) + file(STRINGS ${HEADER} DEFINE_LINE REGEX "^#define[\t ]+${DEFINE_NAME}[\t ]+.*") + if(DEFINE_LINE) + string(REGEX REPLACE "^#define[\t ]+${DEFINE_NAME}[\t ]+([0-9]+).*" "\\1" DEFINE_VALUE ${DEFINE_LINE}) + set(${RESULT_VAR} ${DEFINE_VALUE} PARENT_SCOPE) + else() + message(WARNING "Define ${DEFINE_NAME} not found in ${HEADER}") + set(${RESULT_VAR} "" PARENT_SCOPE) + endif() + endfunction() + function(compile_shader) set(options) set(oneValueArgs) set(multiValueArgs SOURCES) + set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h") + message(STATUS "GGML_HEADER_PATH: ${GGML_HEADER_PATH}") + get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE) cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) foreach(source ${compile_shader_SOURCES}) get_filename_component(filename ${source} NAME) @@ -671,7 +686,10 @@ if (GGML_KOMPUTE) ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp - COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} + ${GGML_HEADER_PATH} + COMMAND ${glslc_executable} --target-env=vulkan1.2 + -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} + -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE} COMMENT "Compiling ${source} to ${spv_file}" ) From d74cc1674f76c32eefb5f5d96eeed3e50e05f45e Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 08:52:07 +0200 Subject: [PATCH 5/9] squash! ggml : move rope type enum to ggml.h This commit fixes the editorconfig-checker warnings. --- ggml/src/CMakeLists.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 2e104890643c1..dff31457f1c2a 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -672,9 +672,8 @@ if (GGML_KOMPUTE) set(options) set(oneValueArgs) set(multiValueArgs SOURCES) - set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h") - message(STATUS "GGML_HEADER_PATH: ${GGML_HEADER_PATH}") - get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE) + set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h") + get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE) cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) foreach(source ${compile_shader_SOURCES}) get_filename_component(filename ${source} NAME) @@ -686,10 +685,10 @@ if (GGML_KOMPUTE) ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp - ${GGML_HEADER_PATH} + ${GGML_HEADER_PATH} COMMAND ${glslc_executable} --target-env=vulkan1.2 - -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} - -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE} + -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} + -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE} COMMENT "Compiling ${source} to ${spv_file}" ) From 3e4d01ce01755fa2aa402d7e91121fffad0afd27 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 10:00:18 +0200 Subject: [PATCH 6/9] squash! ggml : move rope type enum to ggml.h Update comment for ggml_rope function. --- ggml/include/ggml.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index c352d5cd82938..1d2a354024675 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1455,8 +1455,8 @@ extern "C" { struct ggml_tensor * b); // rotary position embedding - // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED) - // if mode & GGML_ROPE_TYPE_NEOX == 1, GPT-NeoX style + // if (mode & 1) - skip n_past elements (NOT SUPPORTED) + // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style // // b is an int32 vector with size a->ne[2], it contains the positions GGML_API struct ggml_tensor * ggml_rope( From ecd1c1e6b80289eb408d95131edb5dff654a3240 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 10:08:51 +0200 Subject: [PATCH 7/9] Revert "squash! ggml : move rope type enum to ggml.h" This reverts commit 6261222bd0dc0efd51f0fb0435ad3f16a5b52fd6. --- ggml/src/CMakeLists.txt | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index dff31457f1c2a..425a2589502eb 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -656,24 +656,10 @@ if (GGML_KOMPUTE) message(FATAL_ERROR "glslc not found") endif() - # Function to extract #define value from header file - function(get_define_value HEADER DEFINE_NAME RESULT_VAR) - file(STRINGS ${HEADER} DEFINE_LINE REGEX "^#define[\t ]+${DEFINE_NAME}[\t ]+.*") - if(DEFINE_LINE) - string(REGEX REPLACE "^#define[\t ]+${DEFINE_NAME}[\t ]+([0-9]+).*" "\\1" DEFINE_VALUE ${DEFINE_LINE}) - set(${RESULT_VAR} ${DEFINE_VALUE} PARENT_SCOPE) - else() - message(WARNING "Define ${DEFINE_NAME} not found in ${HEADER}") - set(${RESULT_VAR} "" PARENT_SCOPE) - endif() - endfunction() - function(compile_shader) set(options) set(oneValueArgs) set(multiValueArgs SOURCES) - set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h") - get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE) cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) foreach(source ${compile_shader_SOURCES}) get_filename_component(filename ${source} NAME) @@ -685,10 +671,7 @@ if (GGML_KOMPUTE) ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp - ${GGML_HEADER_PATH} - COMMAND ${glslc_executable} --target-env=vulkan1.2 - -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} - -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE} + COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} COMMENT "Compiling ${source} to ${spv_file}" ) From cfb98641828d79d7ffba94f74b970aed133a520b Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sun, 11 Aug 2024 10:11:16 +0200 Subject: [PATCH 8/9] squash! ggml : move rope type enum to ggml.h Add GGML_ROPE_TYPE_NEOX to rope_common.comp. --- ggml/src/kompute-shaders/rope_common.comp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml/src/kompute-shaders/rope_common.comp b/ggml/src/kompute-shaders/rope_common.comp index 7b9394cb2fffc..df4702896d46f 100644 --- a/ggml/src/kompute-shaders/rope_common.comp +++ b/ggml/src/kompute-shaders/rope_common.comp @@ -1,5 +1,7 @@ #include "common.comp" +#define GGML_ROPE_TYPE_NEOX 2 + // TODO: use a local size of 32 or more (Metal uses 1024) layout(local_size_x = 1) in; From 368eea3a5bee48fe303e3122930e0240bc723200 Mon Sep 17 00:00:00 2001 From: slaren Date: Tue, 13 Aug 2024 21:05:58 +0200 Subject: [PATCH 9/9] remove extra line --- include/llama.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/llama.h b/include/llama.h index 573230be302a3..779f3235b669c 100644 --- a/include/llama.h +++ b/include/llama.h @@ -101,7 +101,6 @@ extern "C" { LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, }; - enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file LLAMA_TOKEN_TYPE_UNDEFINED = 0, LLAMA_TOKEN_TYPE_NORMAL = 1,