RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list (ggerganov#…

…9387) Signed-off-by: Molly Sophia <[email protected]>
arthw · Nov 15, 2024 · 4d868ce · 4d868ce
1 parent cd2cb5f
commit 4d868ce
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 0 deletions.
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -302,6 +302,8 @@ def prepare_tensors(self):
                             gguf.MODEL_TENSOR.TIME_MIX_FIRST,
                             gguf.MODEL_TENSOR.TIME_MIX_W1,
                             gguf.MODEL_TENSOR.TIME_MIX_W2,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
                         )
                     )
                     or not new_name.endswith(".weight")

diff --git a/src/llama.cpp b/src/llama.cpp
@@ -17534,6 +17534,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         quantize &= name.find("time_mix_first.weight") == std::string::npos;
         quantize &= name.find("time_mix_w1.weight") == std::string::npos;
         quantize &= name.find("time_mix_w2.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
 
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;