Skip to content

Commit

Permalink
refactor: simplify estimate
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Jun 7, 2024
1 parent 4133969 commit b3d65a4
Show file tree
Hide file tree
Showing 11 changed files with 363 additions and 285 deletions.
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,6 @@ spew.Dump(f.Estimate(WithContextSize(4096) /* 4K */))

```

#### Estimate with specific offload layers

```go
spew.Dump(f.Estimate(WithOffloadLayers(10)))
```

## License

MIT
54 changes: 18 additions & 36 deletions cmd/gguf-parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ func main() {
skipProxy bool
skipTLS bool
// estimate options
ctxSize = 512
kvType = "f16"
offloadLayers uint64
ctxSize = 512
kvType = "f16"
// output options
version bool
skipModel bool
Expand Down Expand Up @@ -65,7 +64,6 @@ func main() {
fs.BoolVar(&skipTLS, "skip-tls", skipTLS, "Skip TLS verification when reading from a remote URL")
fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Context size to estimate memory usage")
fs.StringVar(&kvType, "kv-type", kvType, "Key-Value cache type, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]")
fs.Uint64Var(&offloadLayers, "offload-layers", offloadLayers, "Specify how many layers to offload, default is fully offloading")
fs.BoolVar(&version, "version", version, "Show version")
fs.BoolVar(&skipModel, "skip-model", skipModel, "Skip model metadata")
fs.BoolVar(&skipArchitecture, "skip-architecture", skipArchitecture, "Skip architecture metadata")
Expand Down Expand Up @@ -129,9 +127,6 @@ func main() {
}
eopts = append(eopts, WithCacheKeyType(kv), WithCacheValueType(kv))
}
if offloadLayers > 0 {
eopts = append(eopts, WithOffloadLayers(offloadLayers))
}

// Parse GGUF file.

Expand Down Expand Up @@ -206,23 +201,23 @@ func main() {
if !skipModel {
tprintf(
"MODEL",
[]string{"Name", "Architecture", "Quantization Version", "File Type", "Little Endian", "Size", "Parameters", "BPW"},
[]string{"Name", "Arch", "Quantization Version", "File Type", "Little Endian", "Size", "Parameters", "BPW"},
[]string{
m.Name,
m.Architecture,
sprintf(m.QuantizationVersion),
sprintf(m.FileType),
sprintf(m.LittleEndian),
m.Size.String(),
m.Parameters.String(),
m.BitsPerWeight.String(),
sprintf(m.Size),
sprintf(m.Parameters),
sprintf(m.BitsPerWeight),
})
}

if !skipArchitecture {
tprintf(
"ARCHITECTURE",
[]string{"Max Context Length", "Embedding Length", "Layers", "Feed Forward Length", "Expert Count", "Vocabulary Length"},
[]string{"Max Context Len", "Embedding Len", "Layers", "Feed Forward Len", "Expert Cnt", "Vocabulary Len"},
[]string{
sprintf(a.MaximumContextLength),
sprintf(a.EmbeddingLength),
Expand All @@ -242,7 +237,7 @@ func main() {
}
tprintf(
"TOKENIZER",
[]string{"Model", "Tokens Length", "Added Tokens Length", "BOS Token", "EOS Token", "Unknown Token", "Separator Token", "Padding Token"},
[]string{"Model", "Tokens Len", "Added Tokens Len", "BOS Token", "EOS Token", "Unknown Token", "Separator Token", "Padding Token"},
[]string{
t.Model,
sprintf(t.TokensLength),
Expand All @@ -256,30 +251,17 @@ func main() {
}

if !skipEstimate {
bs := [][]string{
{
"TOTAL",
sprintf(ctxSize),
e.Total.KVCache.Sum().String(),
e.Total.Compute.String(),
e.Total.IO.String(),
e.Total.Sum().String(),
},
}
if e.Offload != nil {
bs = append(bs, []string{
"OFFLOAD",
sprintf(ctxSize),
e.Offload.KVCache.Sum().String(),
e.Offload.Compute.String(),
e.Offload.IO.String(),
e.Offload.Sum().String(),
})
}
tprintf(
"ESTIMATE",
[]string{"/", "Context Length", "KV Cache", "Compute Memory", "IO Memory", "Sum"},
bs...)
[]string{"Context Size", "Model Weight", "KV Cache", "Computation Graph Overhead", "Others", "Usage (w/o MMap)"},
[]string{
sprintf(ctxSize),
sprintf(e.ModelWeight),
sprintf(e.KVCache.Sum()),
sprintf(e.ComputationGraphOverhead),
sprintf(e.Others),
sprintf(e.Sum()) + " (" + sprintf(e.Sum()+e.ModelWeight) + ")",
})
}
}

Expand Down Expand Up @@ -323,7 +305,7 @@ func tprintf(title string, header []string, body ...[]string) {
tb.SetAlignment(tablewriter.ALIGN_CENTER)
tb.SetHeaderLine(true)
tb.SetRowLine(true)
tb.SetAutoMergeCells(true)
tb.SetAutoMergeCellsByColumnIndex([]int{0, 1, 2, 3})
tb.Append(append([]string{title}, header...))
for i := range body {
tb.Append(append([]string{title}, body[i]...))
Expand Down
162 changes: 37 additions & 125 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,59 +165,6 @@ type (
GGUFMetadataKVs []GGUFMetadataKV
)

// Types for GGMLType.
type (
// GGMLType is a type of GGML tensor,
// see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure.
GGMLType uint32

// GGMLTypeTrait holds the trait of a GGMLType,
// see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L564-L918.
GGMLTypeTrait struct {
BlockSize uint64 // Original is int, in order to reduce conversion, here we use uint64.
TypeSize uint64 // Original is uint32, in order to reduce conversion, here we use uint64.
Quantized bool
}
)

// GGMLType constants.
//
// GGMLTypeQ4_2, GGMLTypeQ4_3 are deprecated.
const (
GGMLTypeF32 GGMLType = iota
GGMLTypeF16
GGMLTypeQ4_0
GGMLTypeQ4_1
GGMLTypeQ4_2
GGMLTypeQ4_3
GGMLTypeQ5_0
GGMLTypeQ5_1
GGMLTypeQ8_0
GGMLTypeQ8_1
GGMLTypeQ2_K
GGMLTypeQ3_K
GGMLTypeQ4_K
GGMLTypeQ5_K
GGMLTypeQ6_K
GGMLTypeQ8_K
GGMLTypeIQ2_XXS
GGMLTypeIQ2_XS
GGMLTypeIQ3_XXS
GGMLTypeIQ1_S
GGMLTypeIQ4_NL
GGMLTypeIQ3_S
GGMLTypeIQ2_S
GGMLTypeIQ4_XS
GGMLTypeI8
GGMLTypeI16
GGMLTypeI32
GGMLTypeI64
GGMLTypeF64
GGMLTypeIQ1_M
GGMLTypeBF16
_GGMLTypeCount // Unknown
)

// Types for GGUFTensorInfo.
type (
// GGUFTensorInfo represents a tensor info in a GGUF file.
Expand Down Expand Up @@ -458,7 +405,8 @@ func parseGGUFFile(s int64, f io.ReadSeeker, o _GGUFReadOptions) (_ *GGUFFile, e

// Types for GGUF hierarchical tensors.
type (
// IGGUFTensorInfos is an interface for GGUFTensorInfos.
// IGGUFTensorInfos is an interface for GGUF tensor infos,
// which includes basic operations.
IGGUFTensorInfos interface {
// Get returns the GGUFTensorInfo with the given name,
// and true if found, and false otherwise.
Expand All @@ -468,10 +416,12 @@ type (
// Index returns a map value to the GGUFTensorInfo with the given names,
// and the number of names found.
Index(names []string) (infos map[string]GGUFTensorInfo, found int)
// Elements returns the number of elements of the GGUFTensorInfo.
// Elements returns the number of elements(parameters).
Elements() uint64
// Bytes returns the number of bytes of the GGUFTensorInfo.
// Bytes returns the number of bytes.
Bytes() uint64
// Count returns the number of tensors.
Count() uint64
}

// GGUFLayerTensorInfos represents hierarchical tensor infos of a GGUF file,
Expand All @@ -496,7 +446,16 @@ type (
)

// Layers converts the GGUFTensorInfos to GGUFLayerTensorInfos.
func (gf *GGUFFile) Layers() GGUFLayerTensorInfos {
func (gf *GGUFFile) Layers(ignores ...string) GGUFLayerTensorInfos {
ls := gf.layers()
if len(ignores) != 0 {
_, ls, _ = ls.Cut(ignores)
return ls
}
return ls
}

func (gf *GGUFFile) layers() GGUFLayerTensorInfos {
var ret GGUFLayerTensorInfos

pm := make(map[string]any)
Expand Down Expand Up @@ -921,73 +880,6 @@ func (kvs GGUFMetadataKVs) Index(keys []string) (values map[string]GGUFMetadataK
return values, found
}

// _GGMLTypeTraits is a table of GGMLTypeTrait for GGMLType.
var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{
GGMLTypeF32: {BlockSize: 1, TypeSize: 4},
GGMLTypeF16: {BlockSize: 1, TypeSize: 2},
GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true},
GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated
GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true},
GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true},
GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true},
GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true},
GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true},
GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true},
GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true},
GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true},
GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true},
GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true},
GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true},
GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true},
GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true},
GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true},
GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true},
GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true},
GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true},
GGMLTypeI8: {BlockSize: 1, TypeSize: 1},
GGMLTypeI16: {BlockSize: 1, TypeSize: 2},
GGMLTypeI32: {BlockSize: 1, TypeSize: 4},
GGMLTypeI64: {BlockSize: 1, TypeSize: 8},
GGMLTypeF64: {BlockSize: 1, TypeSize: 8},
GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true},
GGMLTypeBF16: {BlockSize: 1, TypeSize: 2},
}

// Trait returns the GGMLTypeTrait of the GGMLType.
func (t GGMLType) Trait() (GGMLTypeTrait, bool) {
tt, ok := _GGMLTypeTraits[t]
return tt, ok
}

// RowSizeOf returns the size of the given dimensions according to the GGMLType's GGMLTypeTrait,
// which is inspired by
// https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L3142-L3145.
//
// The index of the given dimensions means the number of dimension,
// i.e. 0 is the first dimension, 1 is the second dimension, and so on.
//
// The value of the item is the number of elements in the corresponding dimension.
func (t GGMLType) RowSizeOf(dimensions []uint64) uint64 {
if len(dimensions) == 0 {
panic(errors.New("no dimensions"))
}

tt, ok := t.Trait()
if !ok {
panic(fmt.Errorf("invalid type: %v", t))
}

// https://github.com/ggerganov/ggml/blob/a10a8b880c059b3b29356eb9a9f8df72f03cdb6a/src/ggml.c#L2640-L2643
ds := tt.TypeSize * dimensions[0] / tt.BlockSize // Row size
for i := 1; i < len(dimensions); i++ {
ds *= dimensions[i]
}
return ds
}

// Get returns the GGUFTensorInfo with the given name,
// and true if found, and false otherwise.
func (ti GGUFTensorInfo) Get(name string) (info GGUFTensorInfo, found bool) {
Expand Down Expand Up @@ -1071,6 +963,12 @@ func (ti GGUFTensorInfo) Bytes() uint64 {
return ret
}

// Count returns the number of GGUF tensors of the GGUFTensorInfo,
// which is always 1.
func (ti GGUFTensorInfo) Count() uint64 {
return 1
}

// Get returns the GGUFTensorInfo with the given name,
// and true if found, and false otherwise.
func (tis GGUFTensorInfos) Get(name string) (info GGUFTensorInfo, found bool) {
Expand Down Expand Up @@ -1130,7 +1028,12 @@ func (tis GGUFTensorInfos) Bytes() uint64 {
return ret
}

// Get returns the GGUFTensorInfo with the given name,
// Count returns the number of GGUF tensors of the GGUFTensorInfos.
func (tis GGUFTensorInfos) Count() uint64 {
return uint64(len(tis))
}

// Get returns the IGGUFTensorInfos with the given name,
// and true if found, and false otherwise.
func (ltis GGUFLayerTensorInfos) Get(name string) (info GGUFTensorInfo, found bool) {
for i := range ltis {
Expand Down Expand Up @@ -1211,6 +1114,15 @@ func (ltis GGUFLayerTensorInfos) Bytes() uint64 {
return ret
}

// Count returns the number of GGUF tensors of the GGUFLayerTensorInfos.
func (ltis GGUFLayerTensorInfos) Count() uint64 {
var ret uint64
for i := range ltis {
ret += ltis[i].Count()
}
return ret
}

// Cut splits the GGUFLayerTensorInfos into two parts,
// and returns the GGUFLayerTensorInfos with the names that match the given names at first,
// and the GGUFLayerTensorInfos without the names at second,
Expand Down
6 changes: 6 additions & 0 deletions file_architecture.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ package gguf_parser

// GGUFArchitectureMetadata represents the architecture metadata of a GGUF file.
type GGUFArchitectureMetadata struct {
// Architecture describes what architecture this model implements.
//
// All lowercase ASCII, with only [a-z0-9]+ characters allowed.
Architecture string `json:"architecture"`
// MaximumContextLength(n_ctx_train) is the maximum context length of the model.
//
// For most architectures, this is the hard limit on the length of the input.
Expand Down Expand Up @@ -114,6 +118,8 @@ func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata) {
tokenizerGGMLTokensKey = "tokenizer.ggml.tokens"
)

ga.Architecture = arch

m, _ := gf.Header.MetadataKV.Index([]string{
contextLengthKey,
embeddingLengthKey,
Expand Down
2 changes: 1 addition & 1 deletion file_architecture_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func TestGGUFFile_Architecture(t *testing.T) {
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
"Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
Expand Down
Loading

0 comments on commit b3d65a4

Please sign in to comment.