Skip to content

Commit

Permalink
feat: supprot tq1_0 tq2_0
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Sep 11, 2024
1 parent 4f7ec8a commit ee6d366
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 8 deletions.
10 changes: 10 additions & 0 deletions file_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ const (
GGUFFileTypeMostlyQ4_0_4_4 // Q4_0_4x4
GGUFFileTypeMostlyQ4_0_4_8 // Q4_0_4x8
GGUFFileTypeMostlyQ4_0_8_8 // Q4_0_8x8
GGUFFileTypeMostlyTQ1_0 // TQ1_0
GGUFFileTypeMostlyTQ2_0 // TQ2_0
_GGUFFileTypeCount // Unknown
)

Expand Down Expand Up @@ -257,6 +259,10 @@ func (t GGUFFileType) GGMLType() GGMLType {
return GGMLTypeQ4_0_4_8
case GGUFFileTypeMostlyQ4_0_8_8:
return GGMLTypeQ4_0_8_8
case GGUFFileTypeMostlyTQ1_0:
return GGMLTypeTQ1_0
case GGUFFileTypeMostlyTQ2_0:
return GGMLTypeTQ2_0
default:
}
return _GGMLTypeCount
Expand Down Expand Up @@ -364,6 +370,10 @@ func (gf *GGUFFile) guessFileType() GGUFFileType {
return GGUFFileTypeMostlyQ4_0_4_8
case GGMLTypeQ4_0_8_8:
return GGUFFileTypeMostlyQ4_0_8_8
case GGMLTypeTQ1_0:
return GGUFFileTypeMostlyTQ1_0
case GGMLTypeTQ2_0:
return GGUFFileTypeMostlyTQ2_0
default:
}
return _GGUFFileTypeCount
Expand Down
8 changes: 6 additions & 2 deletions ggml.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ import (
// Types for GGMLType.
type (
// GGMLType is a type of GGML tensor,
// see https://github.com/ggerganov/llama.cpp/blob/278d0e18469aacf505be18ce790a63c7cc31be26/ggml/include/ggml.h#L354-L390.
// see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/include/ggml.h#L363-L401.
GGMLType uint32

// GGMLTypeTrait holds the trait of a GGMLType,
// see https://github.com/ggerganov/llama.cpp/blob/278d0e18469aacf505be18ce790a63c7cc31be26/ggml/src/ggml.c#L547-L942.
// see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/src/ggml.c#L663-L1082.
GGMLTypeTrait struct {
BlockSize uint64 // Original is int, in order to reduce conversion, here we use uint64.
TypeSize uint64 // Original is uint32, in order to reduce conversion, here we use uint64.
Expand Down Expand Up @@ -59,6 +59,8 @@ const (
GGMLTypeQ4_0_4_4
GGMLTypeQ4_0_4_8
GGMLTypeQ4_0_8_8
GGMLTypeTQ1_0
GGMLTypeTQ2_0
_GGMLTypeCount // Unknown
)

Expand Down Expand Up @@ -98,6 +100,8 @@ var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{
GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true},
GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true},
}

// Trait returns the GGMLTypeTrait of the GGMLType.
Expand Down
8 changes: 5 additions & 3 deletions zz_generated.ggmltype.stringer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions zz_generated.gguffiletype.stringer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ee6d366

Please sign in to comment.