refactor: mark ollama crawl as deprecated

Signed-off-by: thxCode <[email protected]>
gpustack · Jul 15, 2024 · 902a8c5 · 902a8c5
1 parent 9d24318
commit 902a8c5
Show file tree

Hide file tree

Showing 11 changed files with 10 additions and 335 deletions.
diff --git a/README.md b/README.md
@@ -31,14 +31,13 @@ If you need one-shot command-line, try [gguf-parser](./cmd/gguf-parser) from [re
 flowchart
     parseGGUFFileRemote[/parseGGUFFileRemote/]
     parseGGUFFile[/parseGGUFFile/]
-    parseGGUFFileFromDistroMetadata[/parseGGUFFileFromDistroMetadata/]
     ParseGGUFFile -.-> parseGGUFFile
     ParseGGUFFileFromHuggingFace -.-> ParseGGUFFileRemote
     ParseGGUFFileFromModelScope -.-> ParseGGUFFileRemote
     ParseGGUFFileRemote -.-> parseGGUFFileRemote
     parseGGUFFileRemote -.-> parseGGUFFile
-    ParseGGUFFileFromOllama -.-> parseGGUFFileRemote
-    ParseGGUFFileFromOllama -..->|crawl| parseGGUFFileFromDistroMetadata
+    ParseGGUFFileFromOllama -.-> ParseGGUFFileFromOllamaModel
+    ParseGGUFFileFromOllamaModel -.-> parseGGUFFileRemote
 ```
 
 ## Examples

diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
@@ -44,7 +44,7 @@ Usage of gguf-parser ...:
   -no-mmap
         Specify disabling Memory-Mapped using, which is used to estimate the usage. Memory-Mapped can avoid loading the entire model weights into RAM.
   -ol-crawl
-        Crawl the Ollama model instead of blobs fetching, works with --ol-model, which will be more efficient and faster, but lossy.
+        Crawl the Ollama model instead of blobs fetching, works with --ol-model, which will be more efficient and faster, but lossy. [Deprecated, as Ollama Model layer page has changed, will be removed in v0.4.0.]
   -ol-model string
         Model name of Ollama, e.g. gemma2.
   -ol-usage
@@ -236,31 +236,6 @@ $ gguf-parser --ol-model="gemma2"
 |   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.97 MiB + 6.99 GiB = 7.05 GiB | 215.97 MiB |  8.43 GiB   |
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 
-$ gguf-parser --ol-model="gemma2" --ol-crawl
-+--------------+--------+--------+--------------+---------------+----------+------------+----------+
-|      \       |  Name  |  Arch  | Quantization | Little Endian |   Size   | Parameters |   BPW    |
-+--------------+--------+--------+--------------+---------------+----------+------------+----------+
-|    MODEL     | gemma2 | gemma2 |     Q4_0     |     true      | 5.06 GiB |   9.24 B   | 4.71 bpw |
-+--------------+--------+--------+--------------+---------------+----------+------------+----------+
-
-+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
-|      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
-+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
-| ARCHITECTURE |      8192       |     3584      |       2       |         16         |   42   |      14336       |     0      |     256000     |
-+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
-
-+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
-|      \       | Model | Tokens Size | Tokens Len | Added Tokens Len | BOS Token | EOS Token | Unknown Token | Separator Token | Padding Token |
-+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
-|  TOKENIZER   | llama |     N/A     |   256000   |       N/A        |     2     |     1     |       3       |       N/A       |       0       |
-+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
-
-+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-|      \       |  Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |        UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
-+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-|   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.99 MiB + 6.99 GiB = 7.05 GiB | 215.99 MiB |  8.43 GiB   |
-+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-
 ```
 
 #### Parse Clip model

diff --git a/cmd/gguf-parser/go.mod b/cmd/gguf-parser/go.mod
@@ -19,7 +19,6 @@ require (
 	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
 	golang.org/x/exp v0.0.0-20240707233637-46b078467d37 // indirect
 	golang.org/x/mod v0.19.0 // indirect
-	golang.org/x/net v0.27.0 // indirect
 	golang.org/x/sync v0.7.0 // indirect
 	golang.org/x/sys v0.22.0 // indirect
 	golang.org/x/tools v0.23.0 // indirect

diff --git a/cmd/gguf-parser/go.sum b/cmd/gguf-parser/go.sum
diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
@@ -101,7 +101,7 @@ func main() {
 		"gemma2.")
 	fs.BoolVar(&olCrawl, "ol-crawl", olCrawl, "Crawl the Ollama model instead of blobs fetching, "+
 		"works with --ol-model, "+
-		"which will be more efficient and faster, but lossy.")
+		"which will be more efficient and faster, but lossy. [Deprecated, as Ollama Model layer page has changed, will be removed in v0.4.0.]")
 	fs.BoolVar(&olUsage, "ol-usage", olUsage, "Specify respecting the extending layers introduced by Ollama, "+
 		"works with --ol-model, "+
 		"which affects the usage estimation.")
@@ -273,7 +273,7 @@ func main() {
 			gf, err = ParseGGUFFileFromModelScope(ctx, msRepo, msFile, ropts...)
 		case olModel != "":
 			om := ParseOllamaModel(olModel)
-			gf, err = ParseGGUFFileFromOllamaModel(ctx, om, olCrawl, ropts...)
+			gf, err = ParseGGUFFileFromOllamaModel(ctx, om, ropts...)
 			if om != nil && olUsage {
 				// Parameters override.
 				{

diff --git a/file.go b/file.go
@@ -33,14 +33,10 @@ type GGUFFile struct {
 	TensorInfos GGUFTensorInfos `json:"tensorInfos"`
 	// Padding is the padding size of the GGUF file,
 	// which is used to split Header and TensorInfos from tensor data.
-	//
-	// This might be empty if parse from crawler.
 	Padding int64 `json:"padding"`
 	// TensorDataStartOffset is the offset in bytes of the tensor data in this file.
 	//
 	// The offset is the start of the file.
-	//
-	// This might be lossy if parse from crawler.
 	TensorDataStartOffset int64 `json:"tensorDataStartOffset"`
 
 	/* Appendix */
@@ -151,22 +147,16 @@ type (
 		// Len is the length of the array.
 		Len uint64 `json:"len"`
 		// Array holds all array items.
-		//
-		// This might be empty if skipping or parse from crawler.
 		Array []any `json:"array,omitempty"`
 
 		/* Appendix */
 
 		// StartOffset is the offset in bytes of the GGUFMetadataKVArrayValue in the GGUFFile file.
 		//
 		// The offset is the start of the file.
-		//
-		// This might be empty if parse from crawler.
 		StartOffset int64 `json:"startOffset"`
 
 		// Size is the size of the array in bytes.
-		//
-		// This might be empty if parse from crawler.
 		Size int64 `json:"endOffset"`
 	}
 
@@ -200,8 +190,6 @@ type (
 		// StartOffset is the offset in bytes of the GGUFTensorInfo in the GGUFFile file.
 		//
 		// The offset is the start of the file.
-		//
-		// This might be empty if parse from crawler.
 		StartOffset int64 `json:"startOffset"`
 	}
 

diff --git a/file_from_distro.go b/file_from_distro.go
@@ -6,43 +6,27 @@ import (
 	"fmt"
 	"net/http"
 	"path/filepath"
-	"regexp"
-	"sort"
-	"strconv"
 	"time"
 
-	"golang.org/x/exp/maps"
-
-	"github.com/thxcode/gguf-parser-go/util/funcx"
 	"github.com/thxcode/gguf-parser-go/util/httpx"
-	"github.com/thxcode/gguf-parser-go/util/json"
-	"github.com/thxcode/gguf-parser-go/util/stringx"
 )
 
 var (
 	ErrOllamaInvalidModel      = errors.New("ollama invalid model")
 	ErrOllamaBaseLayerNotFound = errors.New("ollama base layer not found")
-	ErrOllamaIllegalMetadata   = errors.New("ollama illegal metadata")
 )
 
 // ParseGGUFFileFromOllama parses a GGUF file from Ollama model's base layer,
 // and returns a GGUFFile, or an error if any.
-//
-// If the crawl is true, it will try to crawl the metadata from Ollama website instead of blobs fetching,
-// which will be more efficient and faster, but lossy.
-// If the crawling fails, it will fall back to the default behavior.
-func ParseGGUFFileFromOllama(ctx context.Context, model string, crawl bool, opts ...GGUFReadOption) (*GGUFFile, error) {
-	return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), crawl, opts...)
+func ParseGGUFFileFromOllama(ctx context.Context, model string, opts ...GGUFReadOption) (*GGUFFile, error) {
+	return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), opts...)
 }
 
 // ParseGGUFFileFromOllamaModel is similar to ParseGGUFFileFromOllama,
 // but inputs an OllamaModel instead of a string.
 //
 // The given OllamaModel will be completed(fetching MediaType, Config and Layers) after calling this function.
-// If the crawl is true, it will try to crawl the metadata from Ollama website instead of blobs fetching,
-// which will be more efficient and faster, but lossy.
-// If the crawling fails, it will fall back to the default behavior.
-func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl bool, opts ...GGUFReadOption) (gf *GGUFFile, err error) {
+func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, opts ...GGUFReadOption) (gf *GGUFFile, err error) {
 	if model == nil {
 		return nil, ErrOllamaInvalidModel
 	}
@@ -56,9 +40,6 @@ func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl
 	{
 		if o.CachePath != "" {
 			o.CachePath = filepath.Join(o.CachePath, "distro", "ollama")
-			if crawl {
-				o.CachePath = filepath.Join(o.CachePath, "brief")
-			}
 		}
 		c := GGUFFileCache(o.CachePath)
 
@@ -115,153 +96,5 @@ func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl
 		}
 	}
 
-	if crawl {
-		r, err := ml.FetchWebPage(ctx, cli)
-		if err == nil {
-			gf, err = parseGGUFFileFromDistroMetadata("ollama", r, ml.Size)
-			if err == nil {
-				return gf, nil
-			}
-		}
-
-		// Fallback to the default behavior.
-	}
-
 	return parseGGUFFileFromRemote(ctx, cli, ml.BlobURL().String(), o)
 }
-
-type _OllamaMetadata struct {
-	Metadata  map[string]any `json:"metadata"`
-	NumParams uint64         `json:"num_params"`
-	Tensors   []struct {
-		Name   string   `json:"name"`
-		Shape  []uint64 `json:"shape"`
-		Offset uint64   `json:"offset"`
-		Type   uint32   `json:"type"`
-	} `json:"tensors"`
-	Version uint32 `json:"version"`
-}
-
-func parseGGUFFileFromDistroMetadata(source, data string, size uint64) (*GGUFFile, error) {
-	if source != "ollama" {
-		return nil, fmt.Errorf("invalid source %q", source)
-	}
-
-	var m _OllamaMetadata
-	{
-		if err := json.Unmarshal([]byte(data), &m); err != nil {
-			return nil, fmt.Errorf("unmarshal metadata: %w", err)
-		}
-		if len(m.Metadata) == 0 || len(m.Tensors) == 0 {
-			return nil, ErrOllamaIllegalMetadata
-		}
-	}
-
-	// Convert.
-
-	var (
-		arrayMetadataValueRegex = regexp.MustCompile(`^\.{3} \((?P<len>\d+) values\)$`)
-
-		gf GGUFFile
-	)
-
-	gf.Header.Magic = GGUFMagicGGUFLe
-	gf.Header.Version = GGUFVersion(m.Version)
-	gf.Header.TensorCount = uint64(len(m.Tensors))
-	gf.Header.MetadataKVCount = uint64(1 /* tokenizer.chat_template */ + len(m.Metadata))
-	gf.Size = GGUFBytesScalar(size)
-	gf.ModelParameters = GGUFParametersScalar(m.NumParams)
-
-	gf.Header.MetadataKV = make([]GGUFMetadataKV, 0, len(m.Metadata))
-	for _, k := range func() []string {
-		ks := maps.Keys(m.Metadata)
-		ks = append(ks, "tokenizer.chat_template")
-		sort.Strings(ks)
-		return ks
-	}() {
-		if k == "tokenizer.chat_template" {
-			gf.Header.MetadataKV = append(gf.Header.MetadataKV, GGUFMetadataKV{
-				Key:       k,
-				ValueType: GGUFMetadataValueTypeString,
-				Value:     "!!! tokenizer.chat_template !!!",
-			})
-			continue
-		}
-
-		var (
-			vt GGUFMetadataValueType
-			v  = m.Metadata[k]
-		)
-		switch vv := v.(type) {
-		case bool:
-			vt = GGUFMetadataValueTypeBool
-		case float64:
-			vt = GGUFMetadataValueTypeFloat32
-			v = float32(vv)
-		case int64:
-			vt = GGUFMetadataValueTypeUint32
-			v = uint32(vv)
-		case string:
-			vt = GGUFMetadataValueTypeString
-			if r := arrayMetadataValueRegex.FindStringSubmatch(vv); len(r) == 2 {
-				vt = GGUFMetadataValueTypeArray
-				av := GGUFMetadataKVArrayValue{
-					Type: GGUFMetadataValueTypeString,
-					Len:  funcx.MustNoError(strconv.ParseUint(r[1], 10, 64)),
-				}
-				switch _, d, _ := stringx.CutFromRight(k, "."); d {
-				case "scores":
-					av.Type = GGUFMetadataValueTypeFloat32
-				case "token_type":
-					av.Type = GGUFMetadataValueTypeInt32
-				}
-				v = av
-			}
-		case []any:
-			vt = GGUFMetadataValueTypeArray
-			av := GGUFMetadataKVArrayValue{
-				Type: GGUFMetadataValueTypeString,
-				Len:  uint64(len(vv)),
-			}
-			if av.Len > 0 {
-				av.Array = vv
-				switch vv[0].(type) {
-				case bool:
-					av.Type = GGUFMetadataValueTypeBool
-				case float64:
-					av.Type = GGUFMetadataValueTypeFloat32
-				case int64:
-					av.Type = GGUFMetadataValueTypeUint32
-				}
-			}
-			v = av
-		}
-		gf.Header.MetadataKV = append(gf.Header.MetadataKV, GGUFMetadataKV{
-			Key:       k,
-			ValueType: vt,
-			Value:     v,
-		})
-	}
-
-	gf.TensorInfos = make([]GGUFTensorInfo, 0, len(m.Tensors))
-	for i := range m.Tensors {
-		t := m.Tensors[i]
-		ti := GGUFTensorInfo{
-			Name:        t.Name,
-			NDimensions: uint32(len(t.Shape)),
-			Dimensions:  t.Shape,
-			Offset:      t.Offset,
-			Type:        GGMLType(t.Type),
-		}
-		gf.TensorInfos = append(gf.TensorInfos, ti)
-		gf.ModelSize += GGUFBytesScalar(ti.Bytes())
-	}
-
-	gf.TensorDataStartOffset = int64(gf.Size - gf.ModelSize)
-
-	if gf.ModelParameters != 0 {
-		gf.ModelBitsPerWeight = GGUFBitsPerWeightScalar(float64(gf.ModelSize) * 8 / float64(gf.ModelParameters))
-	}
-
-	return &gf, nil
-}