Skip to content

Commit

Permalink
Add optional MLP bias for Granite models
Browse files Browse the repository at this point in the history
Add optional MLP bias for ARCH_LLAMA to support Granite models.
Partially addresses ggerganov/issues/7116
Still needs some more changes to properly support Granite.
  • Loading branch information
sroecker authored and giuseppe committed May 22, 2024
1 parent cd93a28 commit bae68ea
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1922,6 +1922,7 @@ struct llama_layer {
struct ggml_tensor * ffn_up_shexp;

// ff bias
struct ggml_tensor * ffn_gate_b;
struct ggml_tensor * ffn_down_b; // b2
struct ggml_tensor * ffn_up_b; // b3
struct ggml_tensor * ffn_act;
Expand Down Expand Up @@ -5006,6 +5007,11 @@ static bool llm_load_tensors(
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});

// optional MLP bias
layer.ffn_gate_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, false);
layer.ffn_down_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, false);
layer.ffn_up_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, false);
} else {
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});

Expand Down Expand Up @@ -7133,9 +7139,9 @@ struct llm_build_context {
cb(cur, "ffn_norm", il);

cur = llm_build_ffn(ctx0, cur,
model.layers[il].ffn_up, NULL,
model.layers[il].ffn_gate, NULL,
model.layers[il].ffn_down, NULL,
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b,
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
NULL,
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
cb(cur, "ffn_out", il);
Expand Down

0 comments on commit bae68ea

Please sign in to comment.