From ee6d366d0978e2c915d9430e5acf122a29393667 Mon Sep 17 00:00:00 2001 From: thxCode Date: Wed, 11 Sep 2024 11:16:35 +0800 Subject: [PATCH] feat: supprot tq1_0 tq2_0 Signed-off-by: thxCode --- file_metadata.go | 10 ++++++++++ ggml.go | 8 ++++++-- zz_generated.ggmltype.stringer.go | 8 +++++--- zz_generated.gguffiletype.stringer.go | 8 +++++--- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/file_metadata.go b/file_metadata.go index a66c4cb..4b0c4de 100644 --- a/file_metadata.go +++ b/file_metadata.go @@ -105,6 +105,8 @@ const ( GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4 GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8 GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8 + GGUFFileTypeMostlyTQ1_0 // TQ1_0 + GGUFFileTypeMostlyTQ2_0 // TQ2_0 _GGUFFileTypeCount // Unknown ) @@ -257,6 +259,10 @@ func (t GGUFFileType) GGMLType() GGMLType { return GGMLTypeQ4_0_4_8 case GGUFFileTypeMostlyQ4_0_8_8: return GGMLTypeQ4_0_8_8 + case GGUFFileTypeMostlyTQ1_0: + return GGMLTypeTQ1_0 + case GGUFFileTypeMostlyTQ2_0: + return GGMLTypeTQ2_0 default: } return _GGMLTypeCount @@ -364,6 +370,10 @@ func (gf *GGUFFile) guessFileType() GGUFFileType { return GGUFFileTypeMostlyQ4_0_4_8 case GGMLTypeQ4_0_8_8: return GGUFFileTypeMostlyQ4_0_8_8 + case GGMLTypeTQ1_0: + return GGUFFileTypeMostlyTQ1_0 + case GGMLTypeTQ2_0: + return GGUFFileTypeMostlyTQ2_0 default: } return _GGUFFileTypeCount diff --git a/ggml.go b/ggml.go index 472add9..d2412fd 100644 --- a/ggml.go +++ b/ggml.go @@ -9,11 +9,11 @@ import ( // Types for GGMLType. type ( // GGMLType is a type of GGML tensor, - // see https://github.com/ggerganov/llama.cpp/blob/278d0e18469aacf505be18ce790a63c7cc31be26/ggml/include/ggml.h#L354-L390. + // see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/include/ggml.h#L363-L401. GGMLType uint32 // GGMLTypeTrait holds the trait of a GGMLType, - // see https://github.com/ggerganov/llama.cpp/blob/278d0e18469aacf505be18ce790a63c7cc31be26/ggml/src/ggml.c#L547-L942. + // see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/src/ggml.c#L663-L1082. GGMLTypeTrait struct { BlockSize uint64 // Original is int, in order to reduce conversion, here we use uint64. TypeSize uint64 // Original is uint32, in order to reduce conversion, here we use uint64. @@ -59,6 +59,8 @@ const ( GGMLTypeQ4_0_4_4 GGMLTypeQ4_0_4_8 GGMLTypeQ4_0_8_8 + GGMLTypeTQ1_0 + GGMLTypeTQ2_0 _GGMLTypeCount // Unknown ) @@ -98,6 +100,8 @@ var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{ GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, + GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true}, + GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true}, } // Trait returns the GGMLTypeTrait of the GGMLType. diff --git a/zz_generated.ggmltype.stringer.go b/zz_generated.ggmltype.stringer.go index 16c5207..53b6ba4 100644 --- a/zz_generated.ggmltype.stringer.go +++ b/zz_generated.ggmltype.stringer.go @@ -42,12 +42,14 @@ func _() { _ = x[GGMLTypeQ4_0_4_4-31] _ = x[GGMLTypeQ4_0_4_8-32] _ = x[GGMLTypeQ4_0_8_8-33] - _ = x[_GGMLTypeCount-34] + _ = x[GGMLTypeTQ1_0-34] + _ = x[GGMLTypeTQ2_0-35] + _ = x[_GGMLTypeCount-36] } -const _GGMLType_name = "F32F16Q4_0Q4_1Q4_2Q4_3Q5_0Q5_1Q8_0Q8_1Q2_KQ3_KQ4_KQ5_KQ6_KQ8_KIQ2_XXSIQ2_XSIQ3_XXSIQ1_SIQ4_NLIQ3_SIQ2_SIQ4_XSI8I16I32I64F64IQ1_MBF16Q4_0_4_4Q4_0_4_8Q4_0_8_8Unknown" +const _GGMLType_name = "F32F16Q4_0Q4_1Q4_2Q4_3Q5_0Q5_1Q8_0Q8_1Q2_KQ3_KQ4_KQ5_KQ6_KQ8_KIQ2_XXSIQ2_XSIQ3_XXSIQ1_SIQ4_NLIQ3_SIQ2_SIQ4_XSI8I16I32I64F64IQ1_MBF16Q4_0_4_4Q4_0_4_8Q4_0_8_8TQ1_0TQ2_0Unknown" -var _GGMLType_index = [...]uint8{0, 3, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 69, 75, 82, 87, 93, 98, 103, 109, 111, 114, 117, 120, 123, 128, 132, 140, 148, 156, 163} +var _GGMLType_index = [...]uint8{0, 3, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 69, 75, 82, 87, 93, 98, 103, 109, 111, 114, 117, 120, 123, 128, 132, 140, 148, 156, 161, 166, 173} func (i GGMLType) String() string { if i >= GGMLType(len(_GGMLType_index)-1) { diff --git a/zz_generated.gguffiletype.stringer.go b/zz_generated.gguffiletype.stringer.go index 3920c26..a2615b4 100644 --- a/zz_generated.gguffiletype.stringer.go +++ b/zz_generated.gguffiletype.stringer.go @@ -36,12 +36,14 @@ func _() { _ = x[GGUFFileTypeMostlyQ4_0_4_4-25] _ = x[GGUFFileTypeMostlyQ4_0_4_8-26] _ = x[GGUFFileTypeMostlyQ4_0_8_8-27] - _ = x[_GGUFFileTypeCount-28] + _ = x[GGUFFileTypeMostlyTQ1_0-28] + _ = x[GGUFFileTypeMostlyTQ2_0-29] + _ = x[_GGUFFileTypeCount-30] } -const _GGUFFileType_name = "F32F16Q4_0Q4_1Q4_1_F16Q4_2Q4_3Q8_0Q5_0Q5_1Q2_KQ3_K/Q3_K_SQ4_K/Q3_K_MQ5_K/Q3_K_LQ6_K/Q4_K_SIQ2_XXS/Q4_K_MIQ2_XS/Q5_K_SIQ3_XXS/Q5_K_MIQ1_S/Q6_KIQ4_NLIQ3_SIQ2_SIQ4_XSIQ1_MBF16Q4_0_4x4Q4_0_4x8Q4_0_8x8Unknown" +const _GGUFFileType_name = "F32F16Q4_0Q4_1Q4_1_F16Q4_2Q4_3Q8_0Q5_0Q5_1Q2_KQ3_K/Q3_K_SQ4_K/Q3_K_MQ5_K/Q3_K_LQ6_K/Q4_K_SIQ2_XXS/Q4_K_MIQ2_XS/Q5_K_SIQ3_XXS/Q5_K_MIQ1_S/Q6_KIQ4_NLIQ3_SIQ2_SIQ4_XSIQ1_MBF16Q4_0_4x4Q4_0_4x8Q4_0_8x8TQ1_0TQ2_0Unknown" -var _GGUFFileType_index = [...]uint8{0, 3, 6, 10, 14, 22, 26, 30, 34, 38, 42, 46, 57, 68, 79, 90, 104, 117, 131, 141, 147, 152, 157, 163, 168, 172, 180, 188, 196, 203} +var _GGUFFileType_index = [...]uint8{0, 3, 6, 10, 14, 22, 26, 30, 34, 38, 42, 46, 57, 68, 79, 90, 104, 117, 131, 141, 147, 152, 157, 163, 168, 172, 180, 188, 196, 201, 206, 213} func (i GGUFFileType) String() string { if i >= GGUFFileType(len(_GGUFFileType_index)-1) {