Skip to content

Commit

Permalink
llama : fix n_rot default (ggerganov#8348)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov authored and Nexesenex committed Jul 11, 2024
1 parent 6030c8b commit 1bf7194
Showing 1 changed file with 15 additions and 14 deletions.
29 changes: 15 additions & 14 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4745,16 +4745,6 @@ static void llm_load_hparams(

// non-transformer models do not have attention heads
if (hparams.n_head() > 0) {
// sanity check for n_rot (optional)
hparams.n_rot = hparams.n_embd / hparams.n_head();

ml.get_key(LLM_KV_ROPE_DIMENSION_COUNT, hparams.n_rot, false);

if (model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON) {
if (hparams.n_rot != hparams.n_embd / hparams.n_head()) {
throw std::runtime_error(format("invalid n_rot: %u, expected %u", hparams.n_rot, hparams.n_embd / hparams.n_head()));
}
}
// gpt-neox n_rot = rotary_pct * (n_embd / n_head)
// gpt-j n_rot = rotary_dim

Expand All @@ -4763,6 +4753,17 @@ static void llm_load_hparams(

hparams.n_embd_head_v = hparams.n_embd / hparams.n_head();
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH, hparams.n_embd_head_v, false);

// sanity check for n_rot (optional)
hparams.n_rot = hparams.n_embd_head_k;

ml.get_key(LLM_KV_ROPE_DIMENSION_COUNT, hparams.n_rot, false);

if (model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON) {
if (hparams.n_rot != hparams.n_embd_head_k) {
throw std::runtime_error(format("invalid n_rot: %u, expected %u", hparams.n_rot, hparams.n_embd_head_k));
}
}
} else {
hparams.n_rot = 0;
hparams.n_embd_head_k = 0;
Expand Down Expand Up @@ -11650,7 +11651,7 @@ struct llm_build_context {

Qcur = ggml_rope_ext(
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos, nullptr,
n_embd_head_k, rope_type, n_ctx_orig, freq_base, freq_scale,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
cb(Qcur, "Qcur", il);

Expand All @@ -11659,7 +11660,7 @@ struct llm_build_context {

Kcur = ggml_rope_ext(
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos, nullptr,
n_embd_head_k, rope_type, n_ctx_orig, freq_base, freq_scale,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
cb(Kcur, "Kcur", il);

Expand Down Expand Up @@ -11763,7 +11764,7 @@ struct llm_build_context {

Qcur = ggml_rope_ext(
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos, nullptr,
n_embd_head_k, rope_type, n_ctx_orig, freq_base, freq_scale,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
cb(Qcur, "Qcur", il);

Expand All @@ -11772,7 +11773,7 @@ struct llm_build_context {

Kcur = ggml_rope_ext(
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos, nullptr,
n_embd_head_k, rope_type, n_ctx_orig, freq_base, freq_scale,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
cb(Kcur, "Kcur", il);

Expand Down

0 comments on commit 1bf7194

Please sign in to comment.