IQ1_XL and some corrections

notably on attn_q and parenthesis
ggerganov · Aug 11, 2024 · 91db53b · 91db53b
1 parent 1268d58
commit 91db53b
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 45 deletions.
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -28,13 +28,14 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "IQ1_XS",   LLAMA_FTYPE_MOSTLY_IQ1_XS,   " 1.6-1.7 bpw quantization mix",     },
     { "IQ1_S",    LLAMA_FTYPE_MOSTLY_IQ1_S,    " 1.56 bpw quantization",            },
     { "IQ1_M",    LLAMA_FTYPE_MOSTLY_IQ1_M,    " 1.75 bpw quantization",            },
+    { "IQ1_XL",   LLAMA_FTYPE_MOSTLY_IQ1_XL,   " 1.90 bpw quantization",            },
     { "Q2_K",     LLAMA_FTYPE_MOSTLY_Q2_K,     " 2.96G, +3.5199 ppl @ Llama-3-8B",  },
     { "Q2_K_S",   LLAMA_FTYPE_MOSTLY_Q2_K_S,   " 2.96G, +3.1836 ppl @ Llama-3-8B",  },
-    { "Q2_K_L",   LLAMA_FTYPE_MOSTLY_Q2_K_L,   " 2.96G, +3.1836 ppl @ Llama-3-8B",  },
+    { "Q2_K_L",   LLAMA_FTYPE_MOSTLY_Q2_K_L,   " 3.20G, +3.1836 ppl @ Llama-3-8B",  },
     { "IQ3_XXS",  LLAMA_FTYPE_MOSTLY_IQ3_XXS,  " 3.06 bpw quantization",            },
     { "IQ3_S",    LLAMA_FTYPE_MOSTLY_IQ3_S,    " 3.44 bpw quantization",            },
-    { "IQ3_M",    LLAMA_FTYPE_MOSTLY_IQ3_M,    " 3.66 bpw quantization mix",        },
-    { "IQ3_XL",   LLAMA_FTYPE_MOSTLY_IQ3_XL,   " 3.85 bpw quantization mix",        },
+    { "IQ3_M",    LLAMA_FTYPE_MOSTLY_IQ3_M,    " 3.70 bpw quantization mix",        },
+    { "IQ3_XL",   LLAMA_FTYPE_MOSTLY_IQ3_XL,   " 3.95 bpw quantization mix",        },
     { "Q3_K",     LLAMA_FTYPE_MOSTLY_Q3_K_M,   "alias for Q3_K_M"                   },
     { "IQ3_XS",   LLAMA_FTYPE_MOSTLY_IQ3_XS,   " 3.3 bpw quantization",             },
     { "Q3_K_S",   LLAMA_FTYPE_MOSTLY_Q3_K_S,   " 3.41G, +1.6321 ppl @ Llama-3-8B",  },

diff --git a/include/llama.h b/include/llama.h
@@ -170,6 +170,7 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_IQ3_XL        = 37, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_Q2_K_L        = 38, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_IQ1_XS        = 39, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_IQ1_XL        = 40, // except 1d tensors
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
     };