Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml : move rope type enum to ggml.h #8949

Merged
merged 9 commits into from
Aug 13, 2024
8 changes: 8 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,14 @@ extern "C" {
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
};

// Rotary Positional Embedding (RoPE) types
enum ggml_rope_type {
GGML_ROPE_TYPE_NONE = -1,
GGML_ROPE_TYPE_NORM = 0,
GGML_ROPE_TYPE_NEOX = 2,
GGML_ROPE_TYPE_GLM = 4,
};

// available tensor operations:
enum ggml_op {
GGML_OP_NONE = 0,
Expand Down
6 changes: 3 additions & 3 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -6545,7 +6545,7 @@ struct ggml_tensor * ggml_rope_back(
GGML_ASSERT(a->ne[2] == b->ne[0]);
GGML_ASSERT(c == NULL && "freq factors not implemented yet");

GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
GGML_ASSERT((mode & GGML_ROPE_TYPE_GLM) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");

bool is_node = false;

Expand Down Expand Up @@ -14093,7 +14093,7 @@ static void ggml_compute_forward_rope_f32(
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const float * freq_factors = NULL;
if (src2 != NULL) {
Expand Down Expand Up @@ -14218,7 +14218,7 @@ static void ggml_compute_forward_rope_f16(
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);

const bool is_neox = mode & 2;
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;

const float * freq_factors = NULL;
if (src2 != NULL) {
Expand Down
11 changes: 1 addition & 10 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,6 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
};

// note: these values should be synchronized with ggml_rope
// TODO: maybe move this enum to ggml.h (ggml_rope_type)
enum llama_rope_type {
LLAMA_ROPE_TYPE_NONE = -1,
LLAMA_ROPE_TYPE_NORM = 0,
LLAMA_ROPE_TYPE_NEOX = 2,
LLAMA_ROPE_TYPE_GLM = 4,
};

enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
LLAMA_TOKEN_TYPE_NORMAL = 1,
Expand Down Expand Up @@ -462,7 +453,7 @@ extern "C" {
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);

LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
LLAMA_API enum ggml_rope_type ggml_rope_type (const struct llama_model * model);
danbev marked this conversation as resolved.
Show resolved Hide resolved

LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
Expand Down
18 changes: 9 additions & 9 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ struct llama_hparams {
llama_token dec_start_token_id = -1;

enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
enum ggml_rope_type rope_type = GGML_ROPE_TYPE_NONE;
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;

bool operator!=(const llama_hparams & other) const {
Expand Down Expand Up @@ -5219,7 +5219,7 @@ static void llm_load_hparams(
hparams.use_alibi = true;
}

hparams.rope_type = llama_rope_type(&model);
hparams.rope_type = ggml_rope_type(&model);
}

static void llm_load_vocab(
Expand Down Expand Up @@ -8331,7 +8331,7 @@ struct llm_build_context {
const bool flash_attn;

const enum llama_pooling_type pooling_type;
const enum llama_rope_type rope_type;
const enum ggml_rope_type rope_type;

const llm_build_cb & cb;

Expand Down Expand Up @@ -15105,7 +15105,7 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
bool need_reserve = false;

// apply K-shift if needed
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.model.hparams.rope_type != GGML_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
GGML_ABORT("Deepseek2 does not support K-shift");
}
Expand Down Expand Up @@ -16881,7 +16881,7 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
return model->vocab.type;
}

enum llama_rope_type llama_rope_type(const struct llama_model * model) {
enum ggml_rope_type ggml_rope_type(const struct llama_model * model) {
switch (model->arch) {
// these models do not use RoPE
case LLM_ARCH_GPT2:
Expand All @@ -16893,7 +16893,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_JINA_BERT_V2:
case LLM_ARCH_T5:
case LLM_ARCH_JAIS:
return LLAMA_ROPE_TYPE_NONE;
return GGML_ROPE_TYPE_NONE;

// use what we call a normal RoPE, operating on pairs of consecutive head values
case LLM_ARCH_LLAMA:
Expand All @@ -16909,7 +16909,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_ARCTIC:
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_CHATGLM:
return LLAMA_ROPE_TYPE_NORM;
return GGML_ROPE_TYPE_NORM;

// the pairs of head values are offset by n_rot/2
case LLM_ARCH_FALCON:
Expand All @@ -16930,14 +16930,14 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_OPENELM:
case LLM_ARCH_GPTNEOX:
case LLM_ARCH_CODESHELL:
return LLAMA_ROPE_TYPE_NEOX;
return GGML_ROPE_TYPE_NEOX;

// all model arches should be listed explicitly here
case LLM_ARCH_UNKNOWN:
GGML_ABORT("unknown architecture");
}

return LLAMA_ROPE_TYPE_NONE;
return GGML_ROPE_TYPE_NONE;
}

enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
Expand Down
Loading