Skip to content

Commit

Permalink
llama : quantization-related fixes for T5
Browse files Browse the repository at this point in the history
  • Loading branch information
sszymczy committed Jun 29, 2024
1 parent 7d7fff4 commit 6dc9eb4
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17195,10 +17195,11 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s

// sanity checks
//
// - qs.n_attention_wv == 0 for Mamba models
// - qs.n_attention_wv == model.hparams.n_layer for Transformer models
// - qs.n_attention_wv == 0 for Mamba models
// - qs.n_attention_wv == model.hparams.n_layer for Transformer models
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
//
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer) && "n_attention_wv is unexpected");
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer) && "n_attention_wv is unexpected");

size_t total_size_org = 0;
size_t total_size_new = 0;
Expand Down Expand Up @@ -17323,6 +17324,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
quantize &= name.find("ssm_x.weight") == std::string::npos;
quantize &= name.find("ssm_dt.weight") == std::string::npos;

// do not quantize relative position bias (T5)
quantize &= name.find("attn_rel_b.weight") == std::string::npos;

enum ggml_type new_type;
void * new_data;
size_t new_size;
Expand Down

0 comments on commit 6dc9eb4

Please sign in to comment.