diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index e3ebd660d6208..05df330c0846f 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -26,7 +26,7 @@ static const std::vector QUANT_OPTIONS = { { "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", }, { "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", }, { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", }, - { "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2.5 bpw quantization", }, + { "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2 bpw quantization", }, { "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", }, { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", }, { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", }, diff --git a/ggml-common.h b/ggml-common.h index d3b6d1a948db8..a1a8246656cca 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -139,10 +139,9 @@ typedef sycl::half2 ggml_half2; #define QK2_2 32 typedef struct { - ggml_half d; // delta uint8_t qs[QK2_2 / 4]; // nibbles / quants } block_q2_2; -static_assert(sizeof(block_q2_2) == sizeof(ggml_half) + QK2_2 / 4, "wrong q2_2 block size/padding"); +static_assert(sizeof(block_q2_2) == QK2_2 / 4, "wrong q2_2 block size/padding"); #define QK4_0 32 typedef struct { diff --git a/ggml-quants.c b/ggml-quants.c index aebeb02170f0b..a3c8c67319557 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -668,10 +668,6 @@ void quantize_row_q2_2_reference(const float * restrict x, block_q2_2 * restrict for (int i = 0; i < nb; i++) { - const float d = 1.0f; - - y[i].d = GGML_FP32_TO_FP16(d); - for (int j = 0; j < qk/4; ++j) { int8_t x0 = (int8_t)x[i*qk + j*4 + 0]; int8_t x1 = (int8_t)x[i*qk + j*4 + 1]; @@ -14369,10 +14365,6 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte } } } break; - case GGML_TYPE_Q2_2: - { - VALIDATE_ROW_DATA_D_F16_IMPL(block_q2_2, data, nb); - } break; case GGML_TYPE_Q4_0: { VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb); @@ -14467,6 +14459,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte { VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb); } break; + case GGML_TYPE_Q2_2: case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: