From 14b549c70814ff9848998366a501975faea2284b Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Fri, 9 Aug 2024 15:35:39 +0200
Subject: [PATCH 1/9] ggml : move rope type enum to ggml.h

This commit moves the `llama_rope_type` enum from `llama.h` to
`ggml.h` and changes its name to `ggml_rope_type`.

The motivation for this change is to address the TODO in `llama.h` and
use the enum in ggml.

Note: This commit does not change the `mode` parameter to be of type
`enum ggml_rope_type`. The name `mode` and its usage suggest that it
might be more generic and possibly used as a bit field for multiple
flags. Further investigation/discussion may be needed to determine
if `mode` should be restricted to RoPE types.
---
 ggml/include/ggml.h |  8 ++++++++
 ggml/src/ggml.c     |  6 +++---
 include/llama.h     | 11 +----------
 src/llama.cpp       | 18 +++++++++---------
 4 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 15602a96df7ad..11b3e17da68f5 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -437,6 +437,14 @@ extern "C" {
         GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
     };
 
+    // Rotary Positional Embedding (RoPE) types
+    enum ggml_rope_type {
+        GGML_ROPE_TYPE_NONE = -1,
+        GGML_ROPE_TYPE_NORM =  0,
+        GGML_ROPE_TYPE_NEOX =  2,
+        GGML_ROPE_TYPE_GLM  =  4,
+    };
+
     // available tensor operations:
     enum ggml_op {
         GGML_OP_NONE = 0,
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index c937b5e537c54..77d2cd3339d64 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back(
     GGML_ASSERT(a->ne[2] == b->ne[0]);
     GGML_ASSERT(c == NULL && "freq factors not implemented yet");
 
-    GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
+    GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
 
     bool is_node = false;
 
@@ -14093,7 +14093,7 @@ static void ggml_compute_forward_rope_f32(
     float corr_dims[2];
     ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
 
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
     const float * freq_factors = NULL;
     if (src2 != NULL) {
@@ -14218,7 +14218,7 @@ static void ggml_compute_forward_rope_f16(
     float corr_dims[2];
     ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
 
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
     const float * freq_factors = NULL;
     if (src2 != NULL) {
diff --git a/include/llama.h b/include/llama.h
index 66c266298e86f..360f2d7274a09 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -95,15 +95,6 @@ extern "C" {
         LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
     };
 
-    // note: these values should be synchronized with ggml_rope
-    // TODO: maybe move this enum to ggml.h (ggml_rope_type)
-    enum llama_rope_type {
-        LLAMA_ROPE_TYPE_NONE = -1,
-        LLAMA_ROPE_TYPE_NORM =  0,
-        LLAMA_ROPE_TYPE_NEOX =  2,
-        LLAMA_ROPE_TYPE_GLM  =  4,
-    };
-
     enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
         LLAMA_TOKEN_TYPE_UNDEFINED    = 0,
         LLAMA_TOKEN_TYPE_NORMAL       = 1,
@@ -462,7 +453,7 @@ extern "C" {
     LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
 
     LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
-    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);
+    LLAMA_API enum ggml_rope_type     ggml_rope_type    (const struct llama_model * model);
 
     LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
     LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
diff --git a/src/llama.cpp b/src/llama.cpp
index be6dbf88a7790..3cbb63938ca37 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2201,7 +2201,7 @@ struct llama_hparams {
     llama_token dec_start_token_id = -1;
 
     enum llama_pooling_type      pooling_type            = LLAMA_POOLING_TYPE_NONE;
-    enum llama_rope_type         rope_type               = LLAMA_ROPE_TYPE_NONE;
+    enum ggml_rope_type          rope_type               = GGML_ROPE_TYPE_NONE;
     enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
 
     bool operator!=(const llama_hparams & other) const {
@@ -5219,7 +5219,7 @@ static void llm_load_hparams(
         hparams.use_alibi = true;
     }
 
-    hparams.rope_type = llama_rope_type(&model);
+    hparams.rope_type = ggml_rope_type(&model);
 }
 
 static void llm_load_vocab(
@@ -8331,7 +8331,7 @@ struct llm_build_context {
     const bool flash_attn;
 
     const enum llama_pooling_type pooling_type;
-    const enum llama_rope_type    rope_type;
+    const enum ggml_rope_type     rope_type;
 
     const llm_build_cb & cb;
 
@@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
     bool need_reserve = false;
 
     // apply K-shift if needed
-    if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
+    if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
         if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
             GGML_ABORT("Deepseek2 does not support K-shift");
         }
@@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
     return model->vocab.type;
 }
 
-enum llama_rope_type llama_rope_type(const struct llama_model * model) {
+enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
     switch (model->arch) {
         // these models do not use RoPE
         case LLM_ARCH_GPT2:
@@ -16893,7 +16893,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
         case LLM_ARCH_JINA_BERT_V2:
         case LLM_ARCH_T5:
         case LLM_ARCH_JAIS:
-            return LLAMA_ROPE_TYPE_NONE;
+            return GGML_ROPE_TYPE_NONE;
 
         // use what we call a normal RoPE, operating on pairs of consecutive head values
         case LLM_ARCH_LLAMA:
@@ -16909,7 +16909,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
         case LLM_ARCH_ARCTIC:
         case LLM_ARCH_DEEPSEEK2:
         case LLM_ARCH_CHATGLM:
-            return LLAMA_ROPE_TYPE_NORM;
+            return GGML_ROPE_TYPE_NORM;
 
         // the pairs of head values are offset by n_rot/2
         case LLM_ARCH_FALCON:
@@ -16930,14 +16930,14 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
         case LLM_ARCH_OPENELM:
         case LLM_ARCH_GPTNEOX:
         case LLM_ARCH_CODESHELL:
-            return LLAMA_ROPE_TYPE_NEOX;
+            return GGML_ROPE_TYPE_NEOX;
 
         // all model arches should be listed explicitly here
         case LLM_ARCH_UNKNOWN:
             GGML_ABORT("unknown architecture");
     }
 
-    return LLAMA_ROPE_TYPE_NONE;
+    return GGML_ROPE_TYPE_NONE;
 }
 
 enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {

From 5e14dbf2ea5210adbb637f3779f93dd40654c75c Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 10 Aug 2024 06:50:54 +0200
Subject: [PATCH 2/9] squash! ggml : move rope type enum to ggml.h

This commit removes GGML_ROPE_TYPE_NONE and GGML_ROPE_TYPE_GLM from
ggml.h, and back the llama_rope_type enum.

I've kept the assert for GGML_ROPE_TYPE_GLM as I'm not sure if it is
safe to remove it yet.
---
 ggml/include/ggml.h |  2 --
 ggml/src/ggml.c     |  2 +-
 include/llama.h     |  9 ++++++++-
 src/llama.cpp       | 18 +++++++++---------
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 11b3e17da68f5..22f5dfed4fa2e 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -439,10 +439,8 @@ extern "C" {
 
     // Rotary Positional Embedding (RoPE) types
     enum ggml_rope_type {
-        GGML_ROPE_TYPE_NONE = -1,
         GGML_ROPE_TYPE_NORM =  0,
         GGML_ROPE_TYPE_NEOX =  2,
-        GGML_ROPE_TYPE_GLM  =  4,
     };
 
     // available tensor operations:
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 77d2cd3339d64..b3f37ac81e0bc 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back(
     GGML_ASSERT(a->ne[2] == b->ne[0]);
     GGML_ASSERT(c == NULL && "freq factors not implemented yet");
 
-    GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
+    GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
 
     bool is_node = false;
 
diff --git a/include/llama.h b/include/llama.h
index 360f2d7274a09..f192d8106364d 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -95,6 +95,13 @@ extern "C" {
         LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
     };
 
+    enum llama_rope_type {
+        LLAMA_ROPE_TYPE_NONE = -1,
+        LLAMA_ROPE_TYPE_NORM = GGML_ROPE_TYPE_NORM,
+        LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
+    };
+
+
     enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
         LLAMA_TOKEN_TYPE_UNDEFINED    = 0,
         LLAMA_TOKEN_TYPE_NORMAL       = 1,
@@ -453,7 +460,7 @@ extern "C" {
     LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
 
     LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
-    LLAMA_API enum ggml_rope_type     ggml_rope_type    (const struct llama_model * model);
+    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);
 
     LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
     LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
diff --git a/src/llama.cpp b/src/llama.cpp
index 3cbb63938ca37..be6dbf88a7790 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2201,7 +2201,7 @@ struct llama_hparams {
     llama_token dec_start_token_id = -1;
 
     enum llama_pooling_type      pooling_type            = LLAMA_POOLING_TYPE_NONE;
-    enum ggml_rope_type          rope_type               = GGML_ROPE_TYPE_NONE;
+    enum llama_rope_type         rope_type               = LLAMA_ROPE_TYPE_NONE;
     enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
 
     bool operator!=(const llama_hparams & other) const {
@@ -5219,7 +5219,7 @@ static void llm_load_hparams(
         hparams.use_alibi = true;
     }
 
-    hparams.rope_type = ggml_rope_type(&model);
+    hparams.rope_type = llama_rope_type(&model);
 }
 
 static void llm_load_vocab(
@@ -8331,7 +8331,7 @@ struct llm_build_context {
     const bool flash_attn;
 
     const enum llama_pooling_type pooling_type;
-    const enum ggml_rope_type     rope_type;
+    const enum llama_rope_type    rope_type;
 
     const llm_build_cb & cb;
 
@@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
     bool need_reserve = false;
 
     // apply K-shift if needed
-    if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
+    if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
         if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
             GGML_ABORT("Deepseek2 does not support K-shift");
         }
@@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
     return model->vocab.type;
 }
 
-enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
+enum llama_rope_type llama_rope_type(const struct llama_model * model) {
     switch (model->arch) {
         // these models do not use RoPE
         case LLM_ARCH_GPT2:
@@ -16893,7 +16893,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
         case LLM_ARCH_JINA_BERT_V2:
         case LLM_ARCH_T5:
         case LLM_ARCH_JAIS:
-            return GGML_ROPE_TYPE_NONE;
+            return LLAMA_ROPE_TYPE_NONE;
 
         // use what we call a normal RoPE, operating on pairs of consecutive head values
         case LLM_ARCH_LLAMA:
@@ -16909,7 +16909,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
         case LLM_ARCH_ARCTIC:
         case LLM_ARCH_DEEPSEEK2:
         case LLM_ARCH_CHATGLM:
-            return GGML_ROPE_TYPE_NORM;
+            return LLAMA_ROPE_TYPE_NORM;
 
         // the pairs of head values are offset by n_rot/2
         case LLM_ARCH_FALCON:
@@ -16930,14 +16930,14 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
         case LLM_ARCH_OPENELM:
         case LLM_ARCH_GPTNEOX:
         case LLM_ARCH_CODESHELL:
-            return GGML_ROPE_TYPE_NEOX;
+            return LLAMA_ROPE_TYPE_NEOX;
 
         // all model arches should be listed explicitly here
         case LLM_ARCH_UNKNOWN:
             GGML_ABORT("unknown architecture");
     }
 
-    return GGML_ROPE_TYPE_NONE;
+    return LLAMA_ROPE_TYPE_NONE;
 }
 
 enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {

From c9206f63be303ce74e53636c278c6294bb7b5100 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 06:58:14 +0200
Subject: [PATCH 3/9] squash! ggml : move rope type enum to ggml.h

This commit removes the enum ggml_rope_type from ggml.h and replaces it
with a define (GGML_ROPE_TYPE_NEOX). This define is used in the code to
check if the mode is set to GPT-NeoX. Also the enum llama_rope_type has
been updated to reflect this change.
---
 ggml/include/ggml.h                       | 10 +++-------
 ggml/src/ggml-cann/aclnn_ops.cpp          |  2 +-
 ggml/src/ggml-cuda/rope.cu                |  2 +-
 ggml/src/ggml-metal.m                     |  2 +-
 ggml/src/ggml-sycl/rope.cpp               |  2 +-
 ggml/src/ggml-vulkan.cpp                  |  2 +-
 ggml/src/kompute-shaders/op_rope_f16.comp |  2 +-
 ggml/src/kompute-shaders/op_rope_f32.comp |  2 +-
 include/llama.h                           |  2 +-
 9 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 22f5dfed4fa2e..c352d5cd82938 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -244,6 +244,8 @@
 #define GGML_EXIT_SUCCESS 0
 #define GGML_EXIT_ABORTED 1
 
+#define GGML_ROPE_TYPE_NEOX 2
+
 #define GGUF_MAGIC "GGUF"
 
 #define GGUF_VERSION 3
@@ -437,12 +439,6 @@ extern "C" {
         GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
     };
 
-    // Rotary Positional Embedding (RoPE) types
-    enum ggml_rope_type {
-        GGML_ROPE_TYPE_NORM =  0,
-        GGML_ROPE_TYPE_NEOX =  2,
-    };
-
     // available tensor operations:
     enum ggml_op {
         GGML_OP_NONE = 0,
@@ -1460,7 +1456,7 @@ extern "C" {
 
     // rotary position embedding
     // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
-    // if mode & 2 == 1, GPT-NeoX style
+    // if mode & GGML_ROPE_TYPE_NEOX == 1, GPT-NeoX style
     //
     // b is an int32 vector with size a->ne[2], it contains the positions
     GGML_API struct ggml_tensor * ggml_rope(
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp
index 8c4132f5bb7ad..a4ec8418e2ab3 100644
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -2881,7 +2881,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
     ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast,
                              beta_slow, corr_dims);
 
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
     // init cos/sin cache
     ggml_cann_pool_alloc sin_allocator(
diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu
index 99ec1dd98ca9c..88f586d689cfd 100644
--- a/ggml/src/ggml-cuda/rope.cu
+++ b/ggml/src/ggml-cuda/rope.cu
@@ -226,7 +226,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
     memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
 
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
     const int32_t * pos = (const int32_t *) src1_d;
 
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
index 9fc08ab3aa5cc..2c4a9e4e478a9 100644
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -2313,7 +2313,7 @@ static enum ggml_status ggml_metal_graph_compute(
                         memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
                         memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
 
-                        const bool is_neox = mode & 2;
+                        const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
                         id<MTLComputePipelineState> pipeline = nil;
 
diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp
index c7545bcc1a8a9..1f06f78fa3d91 100644
--- a/ggml/src/ggml-sycl/rope.cpp
+++ b/ggml/src/ggml-sycl/rope.cpp
@@ -226,7 +226,7 @@ void ggml_sycl_op_rope(
     memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
     memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
 
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
     const int32_t * pos = (const int32_t *) src1_dd;
 
diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp
index b0f36a513f84b..3c4d7dc084fc7 100644
--- a/ggml/src/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan.cpp
@@ -4050,7 +4050,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
     case GGML_OP_ROPE:
         {
             const int mode = ((const int32_t *) dst->op_params)[2];
-            const bool is_neox = mode & 2;
+            const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
 
             if (is_neox) {
                 if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
diff --git a/ggml/src/kompute-shaders/op_rope_f16.comp b/ggml/src/kompute-shaders/op_rope_f16.comp
index 1a4058b3f1f10..0ecfb2eab527c 100644
--- a/ggml/src/kompute-shaders/op_rope_f16.comp
+++ b/ggml/src/kompute-shaders/op_rope_f16.comp
@@ -11,7 +11,7 @@ void main() {
     const uint i2 = gl_WorkGroupID.y;
     const uint i1 = gl_WorkGroupID.x;
 
-    const bool is_neox = (pcs.mode & 2) != 0;
+    const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;
 
     float corr_dims[2];
     rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
diff --git a/ggml/src/kompute-shaders/op_rope_f32.comp b/ggml/src/kompute-shaders/op_rope_f32.comp
index 65e03827a2660..cec0fd9a5d10c 100644
--- a/ggml/src/kompute-shaders/op_rope_f32.comp
+++ b/ggml/src/kompute-shaders/op_rope_f32.comp
@@ -11,7 +11,7 @@ void main() {
     const uint i2 = gl_WorkGroupID.y;
     const uint i1 = gl_WorkGroupID.x;
 
-    const bool is_neox = (pcs.mode & 2) != 0;
+    const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;
 
     float corr_dims[2];
     rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
diff --git a/include/llama.h b/include/llama.h
index f192d8106364d..573230be302a3 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -97,7 +97,7 @@ extern "C" {
 
     enum llama_rope_type {
         LLAMA_ROPE_TYPE_NONE = -1,
-        LLAMA_ROPE_TYPE_NORM = GGML_ROPE_TYPE_NORM,
+        LLAMA_ROPE_TYPE_NORM = 0,
         LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
     };
 

From 6261222bd0dc0efd51f0fb0435ad3f16a5b52fd6 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 08:16:59 +0200
Subject: [PATCH 4/9] squash! ggml : move rope type enum to ggml.h

This commit contains a suggestion enable the GGML_ROPE_TYPE_NEOX
macro/define to be passed to the shader compiler.
---
 ggml/src/CMakeLists.txt | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 425a2589502eb..2e104890643c1 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -656,10 +656,25 @@ if (GGML_KOMPUTE)
         message(FATAL_ERROR "glslc not found")
     endif()
 
+    # Function to extract #define value from header file
+    function(get_define_value HEADER DEFINE_NAME RESULT_VAR)
+        file(STRINGS ${HEADER} DEFINE_LINE REGEX "^#define[\t ]+${DEFINE_NAME}[\t ]+.*")
+        if(DEFINE_LINE)
+            string(REGEX REPLACE "^#define[\t ]+${DEFINE_NAME}[\t ]+([0-9]+).*" "\\1" DEFINE_VALUE ${DEFINE_LINE})
+            set(${RESULT_VAR} ${DEFINE_VALUE} PARENT_SCOPE)
+        else()
+            message(WARNING "Define ${DEFINE_NAME} not found in ${HEADER}")
+            set(${RESULT_VAR} "" PARENT_SCOPE)
+        endif()
+    endfunction()
+
     function(compile_shader)
         set(options)
         set(oneValueArgs)
         set(multiValueArgs SOURCES)
+	set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h")
+	message(STATUS "GGML_HEADER_PATH: ${GGML_HEADER_PATH}")
+	get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE)
         cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
         foreach(source ${compile_shader_SOURCES})
             get_filename_component(filename ${source} NAME)
@@ -671,7 +686,10 @@ if (GGML_KOMPUTE)
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
-                COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
+		${GGML_HEADER_PATH}
+                COMMAND ${glslc_executable} --target-env=vulkan1.2
+	                -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
+			-DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE}
                 COMMENT "Compiling ${source} to ${spv_file}"
                 )
 

From d74cc1674f76c32eefb5f5d96eeed3e50e05f45e Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 08:52:07 +0200
Subject: [PATCH 5/9] squash! ggml : move rope type enum to ggml.h

This commit fixes the editorconfig-checker warnings.
---
 ggml/src/CMakeLists.txt | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 2e104890643c1..dff31457f1c2a 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -672,9 +672,8 @@ if (GGML_KOMPUTE)
         set(options)
         set(oneValueArgs)
         set(multiValueArgs SOURCES)
-	set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h")
-	message(STATUS "GGML_HEADER_PATH: ${GGML_HEADER_PATH}")
-	get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE)
+        set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h")
+        get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE)
         cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
         foreach(source ${compile_shader_SOURCES})
             get_filename_component(filename ${source} NAME)
@@ -686,10 +685,10 @@ if (GGML_KOMPUTE)
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
-		${GGML_HEADER_PATH}
+                ${GGML_HEADER_PATH}
                 COMMAND ${glslc_executable} --target-env=vulkan1.2
-	                -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
-			-DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE}
+                        -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
+                        -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE}
                 COMMENT "Compiling ${source} to ${spv_file}"
                 )
 

From 3e4d01ce01755fa2aa402d7e91121fffad0afd27 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 10:00:18 +0200
Subject: [PATCH 6/9] squash! ggml : move rope type enum to ggml.h

Update comment for ggml_rope function.
---
 ggml/include/ggml.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index c352d5cd82938..1d2a354024675 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1455,8 +1455,8 @@ extern "C" {
             struct ggml_tensor  * b);
 
     // rotary position embedding
-    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
-    // if mode & GGML_ROPE_TYPE_NEOX == 1, GPT-NeoX style
+    // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
+    // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
     //
     // b is an int32 vector with size a->ne[2], it contains the positions
     GGML_API struct ggml_tensor * ggml_rope(

From ecd1c1e6b80289eb408d95131edb5dff654a3240 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 10:08:51 +0200
Subject: [PATCH 7/9] Revert "squash! ggml : move rope type enum to ggml.h"

This reverts commit 6261222bd0dc0efd51f0fb0435ad3f16a5b52fd6.
---
 ggml/src/CMakeLists.txt | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index dff31457f1c2a..425a2589502eb 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -656,24 +656,10 @@ if (GGML_KOMPUTE)
         message(FATAL_ERROR "glslc not found")
     endif()
 
-    # Function to extract #define value from header file
-    function(get_define_value HEADER DEFINE_NAME RESULT_VAR)
-        file(STRINGS ${HEADER} DEFINE_LINE REGEX "^#define[\t ]+${DEFINE_NAME}[\t ]+.*")
-        if(DEFINE_LINE)
-            string(REGEX REPLACE "^#define[\t ]+${DEFINE_NAME}[\t ]+([0-9]+).*" "\\1" DEFINE_VALUE ${DEFINE_LINE})
-            set(${RESULT_VAR} ${DEFINE_VALUE} PARENT_SCOPE)
-        else()
-            message(WARNING "Define ${DEFINE_NAME} not found in ${HEADER}")
-            set(${RESULT_VAR} "" PARENT_SCOPE)
-        endif()
-    endfunction()
-
     function(compile_shader)
         set(options)
         set(oneValueArgs)
         set(multiValueArgs SOURCES)
-        set(GGML_HEADER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml.h")
-        get_define_value(${GGML_HEADER_PATH} GGML_ROPE_TYPE_NEOX GGML_ROPE_TYPE_NEOX_VALUE)
         cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
         foreach(source ${compile_shader_SOURCES})
             get_filename_component(filename ${source} NAME)
@@ -685,10 +671,7 @@ if (GGML_KOMPUTE)
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
                 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
-                ${GGML_HEADER_PATH}
-                COMMAND ${glslc_executable} --target-env=vulkan1.2
-                        -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
-                        -DGGML_ROPE_TYPE_NEOX=${GGML_ROPE_TYPE_NEOX_VALUE}
+                COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
                 COMMENT "Compiling ${source} to ${spv_file}"
                 )
 

From cfb98641828d79d7ffba94f74b970aed133a520b Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 11 Aug 2024 10:11:16 +0200
Subject: [PATCH 8/9] squash! ggml : move rope type enum to ggml.h

Add GGML_ROPE_TYPE_NEOX to rope_common.comp.
---
 ggml/src/kompute-shaders/rope_common.comp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ggml/src/kompute-shaders/rope_common.comp b/ggml/src/kompute-shaders/rope_common.comp
index 7b9394cb2fffc..df4702896d46f 100644
--- a/ggml/src/kompute-shaders/rope_common.comp
+++ b/ggml/src/kompute-shaders/rope_common.comp
@@ -1,5 +1,7 @@
 #include "common.comp"
 
+#define GGML_ROPE_TYPE_NEOX 2
+
 // TODO: use a local size of 32 or more (Metal uses 1024)
 layout(local_size_x = 1) in;
 

From 368eea3a5bee48fe303e3122930e0240bc723200 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Tue, 13 Aug 2024 21:05:58 +0200
Subject: [PATCH 9/9] remove extra line

---
 include/llama.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/llama.h b/include/llama.h
index 573230be302a3..779f3235b669c 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -101,7 +101,6 @@ extern "C" {
         LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
     };
 
-
     enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
         LLAMA_TOKEN_TYPE_UNDEFINED    = 0,
         LLAMA_TOKEN_TYPE_NORMAL       = 1,