From 67ab8015510483ea2a62b61ea8e0e812d73ea3cb Mon Sep 17 00:00:00 2001
From: thxCode <thxcode0824@gmail.com>
Date: Wed, 29 May 2024 13:18:52 +0800
Subject: [PATCH] refactor: general and cmd

Signed-off-by: thxCode <thxcode0824@gmail.com>
---
 cmd/gguf-parser/main.go | 138 ++++++++++++++++++++++++++++------------
 file.go                 |   2 +-
 file_architecture.go    |   6 +-
 file_estimate.go        |   2 +-
 file_model.go           |   1 -
 5 files changed, 101 insertions(+), 48 deletions(-)

diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
index ecbd577..86c63f9 100644
--- a/cmd/gguf-parser/main.go
+++ b/cmd/gguf-parser/main.go
@@ -34,9 +34,13 @@ func main() {
 		// estimate options
 		ctxSize = 512
 		kvType  = "f16"
-		// output
-		json       bool
-		jsonPretty = true
+		// output options
+		skipModel        bool
+		skipArchitecture bool
+		skipTokenizer    bool
+		skipEstimate     bool
+		json             bool
+		jsonPretty       = true
 	)
 	fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
 	fs.Usage = func() {
@@ -58,8 +62,12 @@ func main() {
 	fs.BoolVar(&mmap, "mmap", mmap, "Use mmap to read the local file")
 	fs.BoolVar(&skipProxy, "skip-proxy", skipProxy, "Skip using proxy when reading from a remote URL")
 	fs.BoolVar(&skipTLS, "skip-tls", skipTLS, "Skip TLS verification when reading from a remote URL")
-	fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Maximum context size to estimate memory usage")
+	fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Context size to estimate memory usage")
 	fs.StringVar(&kvType, "kv-type", kvType, "Key-Value cache type, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]")
+	fs.BoolVar(&skipModel, "skip-model", skipModel, "Skip model metadata")
+	fs.BoolVar(&skipArchitecture, "skip-architecture", skipArchitecture, "Skip architecture metadata")
+	fs.BoolVar(&skipTokenizer, "skip-tokenizer", skipTokenizer, "Skip tokenizer metadata")
+	fs.BoolVar(&skipEstimate, "skip-estimate", skipEstimate, "Skip estimate")
 	fs.BoolVar(&json, "json", json, "Output as JSON")
 	fs.BoolVar(&jsonPretty, "json-pretty", jsonPretty, "Output as pretty JSON")
 	if err := fs.Parse(os.Args[1:]); err != nil {
@@ -132,15 +140,40 @@ func main() {
 		}
 	}
 
-	m, a, e := gf.Model(), gf.Architecture(), gf.Estimate(eopts...)
+	var (
+		m GGUFModelMetadata
+		a GGUFArchitectureMetadata
+		t GGUFTokenizerMetadata
+		e GGUFEstimate
+	)
+	if !skipModel {
+		m = gf.Model()
+	}
+	if !skipArchitecture {
+		a = gf.Architecture()
+	}
+	if !skipTokenizer {
+		t = gf.Tokenizer()
+	}
+	if !skipEstimate {
+		e = gf.Estimate(eopts...)
+	}
 
 	// Output
 
 	if json {
-		o := map[string]any{
-			"model":        m,
-			"architecture": a,
-			"estimate":     e,
+		o := map[string]any{}
+		if !skipModel {
+			o["model"] = m
+		}
+		if !skipArchitecture {
+			o["architecture"] = a
+		}
+		if !skipTokenizer {
+			o["tokenizer"] = t
+		}
+		if !skipEstimate {
+			o["estimate"] = e
 		}
 
 		enc := stdjson.NewEncoder(os.Stdout)
@@ -155,39 +188,60 @@ func main() {
 		return
 	}
 
-	tprintf(
-		[]string{"Name", "Architecture", "Quantization Version", "File Type", "Little Endian", "Size", "Parameters", "BPW"},
-		[]string{
-			m.Name,
-			m.Architecture,
-			sprintf(m.QuantizationVersion),
-			sprintf(m.FileType),
-			sprintf(m.LittleEndian),
-			m.Size.String(),
-			m.Parameters.String(),
-			m.BitsPerWeight.String(),
-		})
-
-	tprintf(
-		[]string{"Context Length", "Embedding Length", "Layers", "Feed Forward Length", "Expert Count", "Vocabulary Length"},
-		[]string{
-			sprintf(a.ContextLength),
-			sprintf(a.EmbeddingLength),
-			fmt.Sprintf("%d + 1 = %d",
-				a.BlockCount,
-				a.BlockCount+1),
-			sprintf(a.FeedForwardLength),
-			sprintf(a.ExpertCount),
-			sprintf(a.VocabularyLength),
-		})
-
-	tprintf(
-		[]string{"Load Memory", "KVCache Memory", "Total Memory"},
-		[]string{
-			e.MemoryLoad.String(),
-			e.KVCache.MemoryTotal.String(),
-			e.MemoryTotal.String(),
-		})
+	if !skipModel {
+		tprintf(
+			[]string{"Name", "Architecture", "Quantization Version", "File Type", "Little Endian", "Size", "Parameters", "BPW"},
+			[]string{
+				m.Name,
+				m.Architecture,
+				sprintf(m.QuantizationVersion),
+				sprintf(m.FileType),
+				sprintf(m.LittleEndian),
+				m.Size.String(),
+				m.Parameters.String(),
+				m.BitsPerWeight.String(),
+			})
+	}
+
+	if !skipArchitecture {
+		tprintf(
+			[]string{"Maximum Context Length", "Embedding Length", "Layers", "Feed Forward Length", "Expert Count", "Vocabulary Length"},
+			[]string{
+				sprintf(a.MaximumContextLength),
+				sprintf(a.EmbeddingLength),
+				fmt.Sprintf("%d + 1 = %d",
+					a.BlockCount,
+					a.BlockCount+1),
+				sprintf(a.FeedForwardLength),
+				sprintf(a.ExpertCount),
+				sprintf(a.VocabularyLength),
+			})
+	}
+
+	if !skipTokenizer {
+		tprintf(
+			[]string{"Tokenizer Model", "Tokens Length", "Added Tokens Length", "BOS", "EOS", "Unknown", "Separator", "Padding"},
+			[]string{
+				t.Model,
+				sprintf(t.TokensLength),
+				sprintf(t.AddedTokensLength),
+				sprintf(t.BOSTokenID),
+				sprintf(t.EOSTokenID),
+				sprintf(t.UnknownTokenID),
+				sprintf(t.SeparatorTokenID),
+				sprintf(t.PaddingTokenID),
+			})
+	}
+
+	if !skipEstimate {
+		tprintf(
+			[]string{"Load Memory", "KVCache Memory", "Total Memory"},
+			[]string{
+				e.MemoryLoad.String(),
+				e.KVCache.MemoryTotal.String(),
+				e.MemoryTotal.String(),
+			})
+	}
 }
 
 func sprintf(a any) string {
diff --git a/file.go b/file.go
index 8db5c18..8ef076a 100644
--- a/file.go
+++ b/file.go
@@ -939,7 +939,7 @@ func (gf *GGUFFile) guessParameters() GGUFParametersScalar {
 	//            = BlockCount * (12 * EmbeddingLength * EmbeddingLength + 13 * EmbeddingLength) + VocabularyLength * EmbeddingLength
 
 	ret := blockCount*(12*embeddingLength*embeddingLength+13*embeddingLength) + vocabularyLength*embeddingLength
-	// TODO MoE
+	// TODO MoE / SSM / RoPE.
 	return GGUFParametersScalar(ret)
 }
 
diff --git a/file_architecture.go b/file_architecture.go
index dc94058..8e1dd31 100644
--- a/file_architecture.go
+++ b/file_architecture.go
@@ -2,13 +2,13 @@ package gguf_parser
 
 // GGUFArchitectureMetadata represents the architecture metadata of a GGUF file.
 type GGUFArchitectureMetadata struct {
-	// ContextLength(n_ctx_train) is the context length of the model.
+	// MaximumContextLength(n_ctx_train) is the maximum context length of the model.
 	//
 	// For most architectures, this is the hard limit on the length of the input.
 	// Architectures, like RWKV,
 	// that are not reliant on transformer-style attention may be able to handle larger inputs,
 	// but this is not guaranteed.
-	ContextLength uint64 `json:"contextLength"`
+	MaximumContextLength uint64 `json:"maximumContextLength"`
 	// EmbeddingLength(n_embd) is the length of the embedding layer.
 	EmbeddingLength uint64 `json:"embeddingLength"`
 	// BlockCount(n_layer) is the number of blocks of attention and feed-forward layers,
@@ -147,7 +147,7 @@ func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata) {
 	})
 
 	if v, ok := m[contextLengthKey]; ok {
-		ga.ContextLength = ValueNumeric[uint64](v)
+		ga.MaximumContextLength = ValueNumeric[uint64](v)
 	}
 	if v, ok := m[embeddingLengthKey]; ok {
 		ga.EmbeddingLength = ValueNumeric[uint64](v)
diff --git a/file_estimate.go b/file_estimate.go
index 64bd663..e3398a6 100644
--- a/file_estimate.go
+++ b/file_estimate.go
@@ -49,7 +49,7 @@ func (gf *GGUFFile) estimateKVCache(a GGUFArchitectureMetadata, o _GGUFEstimateO
 	var (
 		embedKeyGQA = uint64(a.AttentionKeyLength) * a.AttentionHeadCountKV
 		embedValGQA = uint64(a.AttentionValueLength) * a.AttentionHeadCountKV
-		kvSize      = a.ContextLength
+		kvSize      = a.MaximumContextLength
 	)
 	{
 		// Correct.
diff --git a/file_model.go b/file_model.go
index bed34c5..42da58b 100644
--- a/file_model.go
+++ b/file_model.go
@@ -18,7 +18,6 @@ type GGUFModelMetadata struct {
 	// Not required if the model is not quantized (i.e. no tensors are quantized).
 	// If any tensors are quantized, this must be present.
 	// This is separate to the quantization scheme of the tensors itself,
-	//
 	// the quantization version may change without changing the scheme's name,
 	// e.g. the quantization scheme is Q5_K, and the QuantizationVersion is 4.
 	QuantizationVersion uint32 `json:"quantizationVersion,omitempty"`