Skip to content

Commit

Permalink
squash! ggml : move rope type enum to ggml.h
Browse files Browse the repository at this point in the history
This commit removes GGML_ROPE_TYPE_NONE and GGML_ROPE_TYPE_GLM from
ggml.h, and back the llama_rope_type enum.

I've kept the assert for GGML_ROPE_TYPE_GLM as I'm not sure if it is
safe to remove it yet.
  • Loading branch information
danbev committed Aug 10, 2024
1 parent 14b549c commit 5e14dbf
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 13 deletions.
2 changes: 0 additions & 2 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,10 +439,8 @@ extern "C" {

// Rotary Positional Embedding (RoPE) types
enum ggml_rope_type {
GGML_ROPE_TYPE_NONE = -1,
GGML_ROPE_TYPE_NORM = 0,
GGML_ROPE_TYPE_NEOX = 2,
GGML_ROPE_TYPE_GLM = 4,
};

// available tensor operations:
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back(
GGML_ASSERT(a->ne[2] == b->ne[0]);
GGML_ASSERT(c == NULL && "freq factors not implemented yet");

GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");

bool is_node = false;

Expand Down
9 changes: 8 additions & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
};

enum llama_rope_type {
LLAMA_ROPE_TYPE_NONE = -1,
LLAMA_ROPE_TYPE_NORM = GGML_ROPE_TYPE_NORM,
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
};


enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
LLAMA_TOKEN_TYPE_NORMAL = 1,
Expand Down Expand Up @@ -453,7 +460,7 @@ extern "C" {
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);

LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
LLAMA_API enum ggml_rope_type ggml_rope_type (const struct llama_model * model);
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);

LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
Expand Down
18 changes: 9 additions & 9 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ struct llama_hparams {
llama_token dec_start_token_id = -1;

enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
enum ggml_rope_type rope_type = GGML_ROPE_TYPE_NONE;
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;

bool operator!=(const llama_hparams & other) const {
Expand Down Expand Up @@ -5219,7 +5219,7 @@ static void llm_load_hparams(
hparams.use_alibi = true;
}

hparams.rope_type = ggml_rope_type(&model);
hparams.rope_type = llama_rope_type(&model);
}

static void llm_load_vocab(
Expand Down Expand Up @@ -8331,7 +8331,7 @@ struct llm_build_context {
const bool flash_attn;

const enum llama_pooling_type pooling_type;
const enum ggml_rope_type rope_type;
const enum llama_rope_type rope_type;

const llm_build_cb & cb;

Expand Down Expand Up @@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
bool need_reserve = false;

// apply K-shift if needed
if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
GGML_ABORT("Deepseek2 does not support K-shift");
}
Expand Down Expand Up @@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
return model->vocab.type;
}

enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
enum llama_rope_type llama_rope_type(const struct llama_model * model) {
switch (model->arch) {
// these models do not use RoPE
case LLM_ARCH_GPT2:
Expand All @@ -16893,7 +16893,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
case LLM_ARCH_JINA_BERT_V2:
case LLM_ARCH_T5:
case LLM_ARCH_JAIS:
return GGML_ROPE_TYPE_NONE;
return LLAMA_ROPE_TYPE_NONE;

// use what we call a normal RoPE, operating on pairs of consecutive head values
case LLM_ARCH_LLAMA:
Expand All @@ -16909,7 +16909,7 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
case LLM_ARCH_ARCTIC:
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_CHATGLM:
return GGML_ROPE_TYPE_NORM;
return LLAMA_ROPE_TYPE_NORM;

// the pairs of head values are offset by n_rot/2
case LLM_ARCH_FALCON:
Expand All @@ -16930,14 +16930,14 @@ enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
case LLM_ARCH_OPENELM:
case LLM_ARCH_GPTNEOX:
case LLM_ARCH_CODESHELL:
return GGML_ROPE_TYPE_NEOX;
return LLAMA_ROPE_TYPE_NEOX;

// all model arches should be listed explicitly here
case LLM_ARCH_UNKNOWN:
GGML_ABORT("unknown architecture");
}

return GGML_ROPE_TYPE_NONE;
return LLAMA_ROPE_TYPE_NONE;
}

enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
Expand Down

0 comments on commit 5e14dbf

Please sign in to comment.