From ef0fbd8eea3d649cd19f197a66ff6f315f16299e Mon Sep 17 00:00:00 2001 From: thxCode Date: Wed, 4 Dec 2024 14:00:55 +0800 Subject: [PATCH] feat: introduce new ggml type Signed-off-by: thxCode --- file_metadata.go | 67 +++++++++++++----------- ggml.go | 74 ++++++++++++++------------- zz_generated.ggmltype.stringer.go | 7 +-- zz_generated.gguffiletype.stringer.go | 7 +-- 4 files changed, 82 insertions(+), 73 deletions(-) diff --git a/file_metadata.go b/file_metadata.go index 528ff98..7ef2a3f 100644 --- a/file_metadata.go +++ b/file_metadata.go @@ -80,37 +80,38 @@ type GGUFFileType uint32 // GGUFFileTypeMostlyQ4_1_F16 is a special case where the majority of the tensors are Q4_1, // but 'token_embd.weight' and 'output.weight' tensors are F16. const ( - GGUFFileTypeAllF32 GGUFFileType = iota // F32 - GGUFFileTypeMostlyF16 // F16 - GGUFFileTypeMostlyQ4_0 // Q4_0 - GGUFFileTypeMostlyQ4_1 // Q4_1 - GGUFFileTypeMostlyQ4_1_F16 // Q4_1_F16 - GGUFFileTypeMostlyQ4_2 // Q4_2 - GGUFFileTypeMostlyQ4_3 // Q4_3 - GGUFFileTypeMostlyQ8_0 // Q8_0 - GGUFFileTypeMostlyQ5_0 // Q5_0 - GGUFFileTypeMostlyQ5_1 // Q5_1 - GGUFFileTypeMostlyQ2_K // Q2_K - GGUFFileTypeMostlyQ3_K // Q3_K/Q3_K_S - GGUFFileTypeMostlyQ4_K // Q4_K/Q3_K_M - GGUFFileTypeMostlyQ5_K // Q5_K/Q3_K_L - GGUFFileTypeMostlyQ6_K // Q6_K/Q4_K_S - GGUFFileTypeMostlyIQ2_XXS // IQ2_XXS/Q4_K_M - GGUFFileTypeMostlyIQ2_XS // IQ2_XS/Q5_K_S - GGUFFileTypeMostlyIQ3_XXS // IQ3_XXS/Q5_K_M - GGUFFileTypeMostlyIQ1_S // IQ1_S/Q6_K - GGUFFileTypeMostlyIQ4_NL // IQ4_NL - GGUFFileTypeMostlyIQ3_S // IQ3_S - GGUFFileTypeMostlyIQ2_S // IQ2_S - GGUFFileTypeMostlyIQ4_XS // IQ4_XS - GGUFFileTypeMostlyIQ1_M // IQ1_M - GGUFFileTypeMostlyBF16 // BF16 - GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4 - GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8 - GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8 - GGUFFileTypeMostlyTQ1_0 // TQ1_0 - GGUFFileTypeMostlyTQ2_0 // TQ2_0 - _GGUFFileTypeCount // Unknown + GGUFFileTypeAllF32 GGUFFileType = iota // F32 + GGUFFileTypeMostlyF16 // F16 + GGUFFileTypeMostlyQ4_0 // Q4_0 + GGUFFileTypeMostlyQ4_1 // Q4_1 + GGUFFileTypeMostlyQ4_1_F16 // Q4_1_F16 + GGUFFileTypeMostlyQ4_2 // Q4_2 + GGUFFileTypeMostlyQ4_3 // Q4_3 + GGUFFileTypeMostlyQ8_0 // Q8_0 + GGUFFileTypeMostlyQ5_0 // Q5_0 + GGUFFileTypeMostlyQ5_1 // Q5_1 + GGUFFileTypeMostlyQ2_K // Q2_K + GGUFFileTypeMostlyQ3_K // Q3_K/Q3_K_S + GGUFFileTypeMostlyQ4_K // Q4_K/Q3_K_M + GGUFFileTypeMostlyQ5_K // Q5_K/Q3_K_L + GGUFFileTypeMostlyQ6_K // Q6_K/Q4_K_S + GGUFFileTypeMostlyIQ2_XXS // IQ2_XXS/Q4_K_M + GGUFFileTypeMostlyIQ2_XS // IQ2_XS/Q5_K_S + GGUFFileTypeMostlyIQ3_XXS // IQ3_XXS/Q5_K_M + GGUFFileTypeMostlyIQ1_S // IQ1_S/Q6_K + GGUFFileTypeMostlyIQ4_NL // IQ4_NL + GGUFFileTypeMostlyIQ3_S // IQ3_S + GGUFFileTypeMostlyIQ2_S // IQ2_S + GGUFFileTypeMostlyIQ4_XS // IQ4_XS + GGUFFileTypeMostlyIQ1_M // IQ1_M + GGUFFileTypeMostlyBF16 // BF16 + GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4 + GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8 + GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8 + GGUFFileTypeMostlyTQ1_0 // TQ1_0 + GGUFFileTypeMostlyTQ2_0 // TQ2_0 + GGUFFileTypeMostlyIQ4_NL_4_4 // IQ4_NL_4x4 + _GGUFFileTypeCount // Unknown ) // Metadata returns the metadata of the GGUF file. @@ -271,6 +272,8 @@ func (t GGUFFileType) GGMLType() GGMLType { return GGMLTypeTQ1_0 case GGUFFileTypeMostlyTQ2_0: return GGMLTypeTQ2_0 + case GGUFFileTypeMostlyIQ4_NL_4_4: + return GGMLTypeIQ4_NL_4_4 default: } return _GGMLTypeCount @@ -391,6 +394,8 @@ func GetFileType(cm map[GGMLType]int) GGUFFileType { return GGUFFileTypeMostlyTQ1_0 case GGMLTypeTQ2_0: return GGUFFileTypeMostlyTQ2_0 + case GGMLTypeIQ4_NL_4_4: + return GGUFFileTypeMostlyIQ4_NL_4_4 default: } return _GGUFFileTypeCount diff --git a/ggml.go b/ggml.go index d2412fd..72d949e 100644 --- a/ggml.go +++ b/ggml.go @@ -61,47 +61,49 @@ const ( GGMLTypeQ4_0_8_8 GGMLTypeTQ1_0 GGMLTypeTQ2_0 + GGMLTypeIQ4_NL_4_4 _GGMLTypeCount // Unknown ) // _GGMLTypeTraits is a table of GGMLTypeTrait for GGMLType. var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{ - GGMLTypeF32: {BlockSize: 1, TypeSize: 4}, - GGMLTypeF16: {BlockSize: 1, TypeSize: 2}, - GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true}, - GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true}, - GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated - GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated - GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true}, - GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true}, - GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true}, - GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true}, - GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true}, - GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true}, - GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true}, - GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true}, - GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true}, - GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true}, - GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true}, - GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true}, - GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true}, - GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true}, - GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true}, - GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true}, - GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true}, - GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true}, - GGMLTypeI8: {BlockSize: 1, TypeSize: 1}, - GGMLTypeI16: {BlockSize: 1, TypeSize: 2}, - GGMLTypeI32: {BlockSize: 1, TypeSize: 4}, - GGMLTypeI64: {BlockSize: 1, TypeSize: 8}, - GGMLTypeF64: {BlockSize: 1, TypeSize: 8}, - GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true}, - GGMLTypeBF16: {BlockSize: 1, TypeSize: 2}, - GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, - GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, - GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, - GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true}, - GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true}, + GGMLTypeF32: {BlockSize: 1, TypeSize: 4}, + GGMLTypeF16: {BlockSize: 1, TypeSize: 2}, + GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true}, + GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated + GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated + GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true}, + GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true}, + GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true}, + GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true}, + GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true}, + GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true}, + GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true}, + GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true}, + GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true}, + GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true}, + GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true}, + GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true}, + GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true}, + GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true}, + GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true}, + GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true}, + GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true}, + GGMLTypeI8: {BlockSize: 1, TypeSize: 1}, + GGMLTypeI16: {BlockSize: 1, TypeSize: 2}, + GGMLTypeI32: {BlockSize: 1, TypeSize: 4}, + GGMLTypeI64: {BlockSize: 1, TypeSize: 8}, + GGMLTypeF64: {BlockSize: 1, TypeSize: 8}, + GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true}, + GGMLTypeBF16: {BlockSize: 1, TypeSize: 2}, + GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true}, + GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true}, + GGMLTypeIQ4_NL_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, } // Trait returns the GGMLTypeTrait of the GGMLType. diff --git a/zz_generated.ggmltype.stringer.go b/zz_generated.ggmltype.stringer.go index 53b6ba4..7176d05 100644 --- a/zz_generated.ggmltype.stringer.go +++ b/zz_generated.ggmltype.stringer.go @@ -44,12 +44,13 @@ func _() { _ = x[GGMLTypeQ4_0_8_8-33] _ = x[GGMLTypeTQ1_0-34] _ = x[GGMLTypeTQ2_0-35] - _ = x[_GGMLTypeCount-36] + _ = x[GGMLTypeIQ4_NL_4_4-36] + _ = x[_GGMLTypeCount-37] } -const _GGMLType_name = "F32F16Q4_0Q4_1Q4_2Q4_3Q5_0Q5_1Q8_0Q8_1Q2_KQ3_KQ4_KQ5_KQ6_KQ8_KIQ2_XXSIQ2_XSIQ3_XXSIQ1_SIQ4_NLIQ3_SIQ2_SIQ4_XSI8I16I32I64F64IQ1_MBF16Q4_0_4_4Q4_0_4_8Q4_0_8_8TQ1_0TQ2_0Unknown" +const _GGMLType_name = "F32F16Q4_0Q4_1Q4_2Q4_3Q5_0Q5_1Q8_0Q8_1Q2_KQ3_KQ4_KQ5_KQ6_KQ8_KIQ2_XXSIQ2_XSIQ3_XXSIQ1_SIQ4_NLIQ3_SIQ2_SIQ4_XSI8I16I32I64F64IQ1_MBF16Q4_0_4_4Q4_0_4_8Q4_0_8_8TQ1_0TQ2_0IQ4_NL_4_4Unknown" -var _GGMLType_index = [...]uint8{0, 3, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 69, 75, 82, 87, 93, 98, 103, 109, 111, 114, 117, 120, 123, 128, 132, 140, 148, 156, 161, 166, 173} +var _GGMLType_index = [...]uint8{0, 3, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 69, 75, 82, 87, 93, 98, 103, 109, 111, 114, 117, 120, 123, 128, 132, 140, 148, 156, 161, 166, 176, 183} func (i GGMLType) String() string { if i >= GGMLType(len(_GGMLType_index)-1) { diff --git a/zz_generated.gguffiletype.stringer.go b/zz_generated.gguffiletype.stringer.go index a2615b4..5a06073 100644 --- a/zz_generated.gguffiletype.stringer.go +++ b/zz_generated.gguffiletype.stringer.go @@ -38,12 +38,13 @@ func _() { _ = x[GGUFFileTypeMostlyQ4_0_8_8-27] _ = x[GGUFFileTypeMostlyTQ1_0-28] _ = x[GGUFFileTypeMostlyTQ2_0-29] - _ = x[_GGUFFileTypeCount-30] + _ = x[GGUFFileTypeMostlyIQ4_NL_4_4-30] + _ = x[_GGUFFileTypeCount-31] } -const _GGUFFileType_name = "F32F16Q4_0Q4_1Q4_1_F16Q4_2Q4_3Q8_0Q5_0Q5_1Q2_KQ3_K/Q3_K_SQ4_K/Q3_K_MQ5_K/Q3_K_LQ6_K/Q4_K_SIQ2_XXS/Q4_K_MIQ2_XS/Q5_K_SIQ3_XXS/Q5_K_MIQ1_S/Q6_KIQ4_NLIQ3_SIQ2_SIQ4_XSIQ1_MBF16Q4_0_4x4Q4_0_4x8Q4_0_8x8TQ1_0TQ2_0Unknown" +const _GGUFFileType_name = "F32F16Q4_0Q4_1Q4_1_F16Q4_2Q4_3Q8_0Q5_0Q5_1Q2_KQ3_K/Q3_K_SQ4_K/Q3_K_MQ5_K/Q3_K_LQ6_K/Q4_K_SIQ2_XXS/Q4_K_MIQ2_XS/Q5_K_SIQ3_XXS/Q5_K_MIQ1_S/Q6_KIQ4_NLIQ3_SIQ2_SIQ4_XSIQ1_MBF16Q4_0_4x4Q4_0_4x8Q4_0_8x8TQ1_0TQ2_0IQ4_NL_4x4Unknown" -var _GGUFFileType_index = [...]uint8{0, 3, 6, 10, 14, 22, 26, 30, 34, 38, 42, 46, 57, 68, 79, 90, 104, 117, 131, 141, 147, 152, 157, 163, 168, 172, 180, 188, 196, 201, 206, 213} +var _GGUFFileType_index = [...]uint8{0, 3, 6, 10, 14, 22, 26, 30, 34, 38, 42, 46, 57, 68, 79, 90, 104, 117, 131, 141, 147, 152, 157, 163, 168, 172, 180, 188, 196, 201, 206, 216, 223} func (i GGUFFileType) String() string { if i >= GGUFFileType(len(_GGUFFileType_index)-1) {