Skip to content

Commit

Permalink
remove block scale
Browse files Browse the repository at this point in the history
  • Loading branch information
Eddie-Wang1120 committed Jun 18, 2024
1 parent 4edc958 commit 89c7e4c
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 11 deletions.
2 changes: 1 addition & 1 deletion examples/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
{ "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2.5 bpw quantization", },
{ "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2 bpw quantization", },
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },
Expand Down
3 changes: 1 addition & 2 deletions ggml-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,9 @@ typedef sycl::half2 ggml_half2;

#define QK2_2 32
typedef struct {
ggml_half d; // delta
uint8_t qs[QK2_2 / 4]; // nibbles / quants
} block_q2_2;
static_assert(sizeof(block_q2_2) == sizeof(ggml_half) + QK2_2 / 4, "wrong q2_2 block size/padding");
static_assert(sizeof(block_q2_2) == QK2_2 / 4, "wrong q2_2 block size/padding");

#define QK4_0 32
typedef struct {
Expand Down
9 changes: 1 addition & 8 deletions ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,10 +668,6 @@ void quantize_row_q2_2_reference(const float * restrict x, block_q2_2 * restrict

for (int i = 0; i < nb; i++) {

const float d = 1.0f;

y[i].d = GGML_FP32_TO_FP16(d);

for (int j = 0; j < qk/4; ++j) {
int8_t x0 = (int8_t)x[i*qk + j*4 + 0];
int8_t x1 = (int8_t)x[i*qk + j*4 + 1];
Expand Down Expand Up @@ -14369,10 +14365,6 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
}
}
} break;
case GGML_TYPE_Q2_2:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q2_2, data, nb);
} break;
case GGML_TYPE_Q4_0:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
Expand Down Expand Up @@ -14467,6 +14459,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
} break;
case GGML_TYPE_Q2_2:
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
Expand Down

0 comments on commit 89c7e4c

Please sign in to comment.