Skip to content

Commit

Permalink
feat: introduce new ggml type
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Dec 4, 2024
1 parent ddf5b51 commit ef0fbd8
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 73 deletions.
67 changes: 36 additions & 31 deletions file_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,37 +80,38 @@ type GGUFFileType uint32
// GGUFFileTypeMostlyQ4_1_F16 is a special case where the majority of the tensors are Q4_1,
// but 'token_embd.weight' and 'output.weight' tensors are F16.
const (
GGUFFileTypeAllF32 GGUFFileType = iota // F32
GGUFFileTypeMostlyF16 // F16
GGUFFileTypeMostlyQ4_0 // Q4_0
GGUFFileTypeMostlyQ4_1 // Q4_1
GGUFFileTypeMostlyQ4_1_F16 // Q4_1_F16
GGUFFileTypeMostlyQ4_2 // Q4_2
GGUFFileTypeMostlyQ4_3 // Q4_3
GGUFFileTypeMostlyQ8_0 // Q8_0
GGUFFileTypeMostlyQ5_0 // Q5_0
GGUFFileTypeMostlyQ5_1 // Q5_1
GGUFFileTypeMostlyQ2_K // Q2_K
GGUFFileTypeMostlyQ3_K // Q3_K/Q3_K_S
GGUFFileTypeMostlyQ4_K // Q4_K/Q3_K_M
GGUFFileTypeMostlyQ5_K // Q5_K/Q3_K_L
GGUFFileTypeMostlyQ6_K // Q6_K/Q4_K_S
GGUFFileTypeMostlyIQ2_XXS // IQ2_XXS/Q4_K_M
GGUFFileTypeMostlyIQ2_XS // IQ2_XS/Q5_K_S
GGUFFileTypeMostlyIQ3_XXS // IQ3_XXS/Q5_K_M
GGUFFileTypeMostlyIQ1_S // IQ1_S/Q6_K
GGUFFileTypeMostlyIQ4_NL // IQ4_NL
GGUFFileTypeMostlyIQ3_S // IQ3_S
GGUFFileTypeMostlyIQ2_S // IQ2_S
GGUFFileTypeMostlyIQ4_XS // IQ4_XS
GGUFFileTypeMostlyIQ1_M // IQ1_M
GGUFFileTypeMostlyBF16 // BF16
GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4
GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8
GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8
GGUFFileTypeMostlyTQ1_0 // TQ1_0
GGUFFileTypeMostlyTQ2_0 // TQ2_0
_GGUFFileTypeCount // Unknown
GGUFFileTypeAllF32 GGUFFileType = iota // F32
GGUFFileTypeMostlyF16 // F16
GGUFFileTypeMostlyQ4_0 // Q4_0
GGUFFileTypeMostlyQ4_1 // Q4_1
GGUFFileTypeMostlyQ4_1_F16 // Q4_1_F16
GGUFFileTypeMostlyQ4_2 // Q4_2
GGUFFileTypeMostlyQ4_3 // Q4_3
GGUFFileTypeMostlyQ8_0 // Q8_0
GGUFFileTypeMostlyQ5_0 // Q5_0
GGUFFileTypeMostlyQ5_1 // Q5_1
GGUFFileTypeMostlyQ2_K // Q2_K
GGUFFileTypeMostlyQ3_K // Q3_K/Q3_K_S
GGUFFileTypeMostlyQ4_K // Q4_K/Q3_K_M
GGUFFileTypeMostlyQ5_K // Q5_K/Q3_K_L
GGUFFileTypeMostlyQ6_K // Q6_K/Q4_K_S
GGUFFileTypeMostlyIQ2_XXS // IQ2_XXS/Q4_K_M
GGUFFileTypeMostlyIQ2_XS // IQ2_XS/Q5_K_S
GGUFFileTypeMostlyIQ3_XXS // IQ3_XXS/Q5_K_M
GGUFFileTypeMostlyIQ1_S // IQ1_S/Q6_K
GGUFFileTypeMostlyIQ4_NL // IQ4_NL
GGUFFileTypeMostlyIQ3_S // IQ3_S
GGUFFileTypeMostlyIQ2_S // IQ2_S
GGUFFileTypeMostlyIQ4_XS // IQ4_XS
GGUFFileTypeMostlyIQ1_M // IQ1_M
GGUFFileTypeMostlyBF16 // BF16
GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4
GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8
GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8
GGUFFileTypeMostlyTQ1_0 // TQ1_0
GGUFFileTypeMostlyTQ2_0 // TQ2_0
GGUFFileTypeMostlyIQ4_NL_4_4 // IQ4_NL_4x4
_GGUFFileTypeCount // Unknown
)

// Metadata returns the metadata of the GGUF file.
Expand Down Expand Up @@ -271,6 +272,8 @@ func (t GGUFFileType) GGMLType() GGMLType {
return GGMLTypeTQ1_0
case GGUFFileTypeMostlyTQ2_0:
return GGMLTypeTQ2_0
case GGUFFileTypeMostlyIQ4_NL_4_4:
return GGMLTypeIQ4_NL_4_4
default:
}
return _GGMLTypeCount
Expand Down Expand Up @@ -391,6 +394,8 @@ func GetFileType(cm map[GGMLType]int) GGUFFileType {
return GGUFFileTypeMostlyTQ1_0
case GGMLTypeTQ2_0:
return GGUFFileTypeMostlyTQ2_0
case GGMLTypeIQ4_NL_4_4:
return GGUFFileTypeMostlyIQ4_NL_4_4
default:
}
return _GGUFFileTypeCount
Expand Down
74 changes: 38 additions & 36 deletions ggml.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,47 +61,49 @@ const (
GGMLTypeQ4_0_8_8
GGMLTypeTQ1_0
GGMLTypeTQ2_0
GGMLTypeIQ4_NL_4_4
_GGMLTypeCount // Unknown
)

// _GGMLTypeTraits is a table of GGMLTypeTrait for GGMLType.
var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{
GGMLTypeF32: {BlockSize: 1, TypeSize: 4},
GGMLTypeF16: {BlockSize: 1, TypeSize: 2},
GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true},
GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true},
GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true},
GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true},
GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true},
GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true},
GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true},
GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true},
GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true},
GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true},
GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true},
GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true},
GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true},
GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true},
GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true},
GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true},
GGMLTypeI8: {BlockSize: 1, TypeSize: 1},
GGMLTypeI16: {BlockSize: 1, TypeSize: 2},
GGMLTypeI32: {BlockSize: 1, TypeSize: 4},
GGMLTypeI64: {BlockSize: 1, TypeSize: 8},
GGMLTypeF64: {BlockSize: 1, TypeSize: 8},
GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true},
GGMLTypeBF16: {BlockSize: 1, TypeSize: 2},
GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true},
GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true},
GGMLTypeF32: {BlockSize: 1, TypeSize: 4},
GGMLTypeF16: {BlockSize: 1, TypeSize: 2},
GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true},
GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true},
GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true},
GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true},
GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true},
GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true},
GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true},
GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true},
GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true},
GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true},
GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true},
GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true},
GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true},
GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true},
GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true},
GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true},
GGMLTypeI8: {BlockSize: 1, TypeSize: 1},
GGMLTypeI16: {BlockSize: 1, TypeSize: 2},
GGMLTypeI32: {BlockSize: 1, TypeSize: 4},
GGMLTypeI64: {BlockSize: 1, TypeSize: 8},
GGMLTypeF64: {BlockSize: 1, TypeSize: 8},
GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true},
GGMLTypeBF16: {BlockSize: 1, TypeSize: 2},
GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true},
GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true},
GGMLTypeIQ4_NL_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true},
}

// Trait returns the GGMLTypeTrait of the GGMLType.
Expand Down
7 changes: 4 additions & 3 deletions zz_generated.ggmltype.stringer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions zz_generated.gguffiletype.stringer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ef0fbd8

Please sign in to comment.