From 9444f3fca2a004e55dd8fe3df1d102997d7147ec Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Mon, 9 Sep 2024 21:31:04 +0800 Subject: [PATCH] RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list Signed-off-by: Molly Sophia --- convert_hf_to_gguf.py | 2 ++ src/llama.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 0a9bbc8294ef7..ca473244eb929 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -302,6 +302,8 @@ def prepare_tensors(self): gguf.MODEL_TENSOR.TIME_MIX_FIRST, gguf.MODEL_TENSOR.TIME_MIX_W1, gguf.MODEL_TENSOR.TIME_MIX_W2, + gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1, + gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2, ) ) or not new_name.endswith(".weight") diff --git a/src/llama.cpp b/src/llama.cpp index 39e20440eea83..ee27cbd1c3c44 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s quantize &= name.find("time_mix_first.weight") == std::string::npos; quantize &= name.find("time_mix_w1.weight") == std::string::npos; quantize &= name.find("time_mix_w2.weight") == std::string::npos; + quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos; + quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos; // do not quantize relative position bias (T5) quantize &= name.find("attn_rel_b.weight") == std::string::npos;