Skip to content

Commit

Permalink
refactor: mark ollama crawl as deprecated
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Jul 15, 2024
1 parent 9d24318 commit 902a8c5
Show file tree
Hide file tree
Showing 11 changed files with 10 additions and 335 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,13 @@ If you need one-shot command-line, try [gguf-parser](./cmd/gguf-parser) from [re
flowchart
parseGGUFFileRemote[/parseGGUFFileRemote/]
parseGGUFFile[/parseGGUFFile/]
parseGGUFFileFromDistroMetadata[/parseGGUFFileFromDistroMetadata/]
ParseGGUFFile -.-> parseGGUFFile
ParseGGUFFileFromHuggingFace -.-> ParseGGUFFileRemote
ParseGGUFFileFromModelScope -.-> ParseGGUFFileRemote
ParseGGUFFileRemote -.-> parseGGUFFileRemote
parseGGUFFileRemote -.-> parseGGUFFile
ParseGGUFFileFromOllama -.-> parseGGUFFileRemote
ParseGGUFFileFromOllama -..->|crawl| parseGGUFFileFromDistroMetadata
ParseGGUFFileFromOllama -.-> ParseGGUFFileFromOllamaModel
ParseGGUFFileFromOllamaModel -.-> parseGGUFFileRemote
```

## Examples
Expand Down
27 changes: 1 addition & 26 deletions cmd/gguf-parser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Usage of gguf-parser ...:
-no-mmap
Specify disabling Memory-Mapped using, which is used to estimate the usage. Memory-Mapped can avoid loading the entire model weights into RAM.
-ol-crawl
Crawl the Ollama model instead of blobs fetching, works with --ol-model, which will be more efficient and faster, but lossy.
Crawl the Ollama model instead of blobs fetching, works with --ol-model, which will be more efficient and faster, but lossy. [Deprecated, as Ollama Model layer page has changed, will be removed in v0.4.0.]
-ol-model string
Model name of Ollama, e.g. gemma2.
-ol-usage
Expand Down Expand Up @@ -236,31 +236,6 @@ $ gguf-parser --ol-model="gemma2"
| ESTIMATE | gemma2 | 8192 | false | true | 43 (42 + 1) | Yes | 65.97 MiB + 6.99 GiB = 7.05 GiB | 215.97 MiB | 8.43 GiB |
+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
$ gguf-parser --ol-model="gemma2" --ol-crawl
+--------------+--------+--------+--------------+---------------+----------+------------+----------+
| \ | Name | Arch | Quantization | Little Endian | Size | Parameters | BPW |
+--------------+--------+--------+--------------+---------------+----------+------------+----------+
| MODEL | gemma2 | gemma2 | Q4_0 | true | 5.06 GiB | 9.24 B | 4.71 bpw |
+--------------+--------+--------+--------------+---------------+----------+------------+----------+
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| \ | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| ARCHITECTURE | 8192 | 3584 | 2 | 16 | 42 | 14336 | 0 | 256000 |
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| \ | Model | Tokens Size | Tokens Len | Added Tokens Len | BOS Token | EOS Token | Unknown Token | Separator Token | Padding Token |
+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| TOKENIZER | llama | N/A | 256000 | N/A | 2 | 1 | 3 | N/A | 0 |
+--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
| \ | Arch | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded | UMA (RAM + VRAM) | NonUMA RAM | NonUMA VRAM |
+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
| ESTIMATE | gemma2 | 8192 | false | true | 43 (42 + 1) | Yes | 65.99 MiB + 6.99 GiB = 7.05 GiB | 215.99 MiB | 8.43 GiB |
+--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
```
#### Parse Clip model
Expand Down
1 change: 0 additions & 1 deletion cmd/gguf-parser/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ require (
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
golang.org/x/exp v0.0.0-20240707233637-46b078467d37 // indirect
golang.org/x/mod v0.19.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/tools v0.23.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions cmd/gguf-parser/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions cmd/gguf-parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func main() {
"gemma2.")
fs.BoolVar(&olCrawl, "ol-crawl", olCrawl, "Crawl the Ollama model instead of blobs fetching, "+
"works with --ol-model, "+
"which will be more efficient and faster, but lossy.")
"which will be more efficient and faster, but lossy. [Deprecated, as Ollama Model layer page has changed, will be removed in v0.4.0.]")
fs.BoolVar(&olUsage, "ol-usage", olUsage, "Specify respecting the extending layers introduced by Ollama, "+
"works with --ol-model, "+
"which affects the usage estimation.")
Expand Down Expand Up @@ -273,7 +273,7 @@ func main() {
gf, err = ParseGGUFFileFromModelScope(ctx, msRepo, msFile, ropts...)
case olModel != "":
om := ParseOllamaModel(olModel)
gf, err = ParseGGUFFileFromOllamaModel(ctx, om, olCrawl, ropts...)
gf, err = ParseGGUFFileFromOllamaModel(ctx, om, ropts...)
if om != nil && olUsage {
// Parameters override.
{
Expand Down
12 changes: 0 additions & 12 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,10 @@ type GGUFFile struct {
TensorInfos GGUFTensorInfos `json:"tensorInfos"`
// Padding is the padding size of the GGUF file,
// which is used to split Header and TensorInfos from tensor data.
//
// This might be empty if parse from crawler.
Padding int64 `json:"padding"`
// TensorDataStartOffset is the offset in bytes of the tensor data in this file.
//
// The offset is the start of the file.
//
// This might be lossy if parse from crawler.
TensorDataStartOffset int64 `json:"tensorDataStartOffset"`

/* Appendix */
Expand Down Expand Up @@ -151,22 +147,16 @@ type (
// Len is the length of the array.
Len uint64 `json:"len"`
// Array holds all array items.
//
// This might be empty if skipping or parse from crawler.
Array []any `json:"array,omitempty"`

/* Appendix */

// StartOffset is the offset in bytes of the GGUFMetadataKVArrayValue in the GGUFFile file.
//
// The offset is the start of the file.
//
// This might be empty if parse from crawler.
StartOffset int64 `json:"startOffset"`

// Size is the size of the array in bytes.
//
// This might be empty if parse from crawler.
Size int64 `json:"endOffset"`
}

Expand Down Expand Up @@ -200,8 +190,6 @@ type (
// StartOffset is the offset in bytes of the GGUFTensorInfo in the GGUFFile file.
//
// The offset is the start of the file.
//
// This might be empty if parse from crawler.
StartOffset int64 `json:"startOffset"`
}

Expand Down
173 changes: 3 additions & 170 deletions file_from_distro.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,27 @@ import (
"fmt"
"net/http"
"path/filepath"
"regexp"
"sort"
"strconv"
"time"

"golang.org/x/exp/maps"

"github.com/thxcode/gguf-parser-go/util/funcx"
"github.com/thxcode/gguf-parser-go/util/httpx"
"github.com/thxcode/gguf-parser-go/util/json"
"github.com/thxcode/gguf-parser-go/util/stringx"
)

var (
ErrOllamaInvalidModel = errors.New("ollama invalid model")
ErrOllamaBaseLayerNotFound = errors.New("ollama base layer not found")
ErrOllamaIllegalMetadata = errors.New("ollama illegal metadata")
)

// ParseGGUFFileFromOllama parses a GGUF file from Ollama model's base layer,
// and returns a GGUFFile, or an error if any.
//
// If the crawl is true, it will try to crawl the metadata from Ollama website instead of blobs fetching,
// which will be more efficient and faster, but lossy.
// If the crawling fails, it will fall back to the default behavior.
func ParseGGUFFileFromOllama(ctx context.Context, model string, crawl bool, opts ...GGUFReadOption) (*GGUFFile, error) {
return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), crawl, opts...)
func ParseGGUFFileFromOllama(ctx context.Context, model string, opts ...GGUFReadOption) (*GGUFFile, error) {
return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), opts...)
}

// ParseGGUFFileFromOllamaModel is similar to ParseGGUFFileFromOllama,
// but inputs an OllamaModel instead of a string.
//
// The given OllamaModel will be completed(fetching MediaType, Config and Layers) after calling this function.
// If the crawl is true, it will try to crawl the metadata from Ollama website instead of blobs fetching,
// which will be more efficient and faster, but lossy.
// If the crawling fails, it will fall back to the default behavior.
func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl bool, opts ...GGUFReadOption) (gf *GGUFFile, err error) {
func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, opts ...GGUFReadOption) (gf *GGUFFile, err error) {
if model == nil {
return nil, ErrOllamaInvalidModel
}
Expand All @@ -56,9 +40,6 @@ func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl
{
if o.CachePath != "" {
o.CachePath = filepath.Join(o.CachePath, "distro", "ollama")
if crawl {
o.CachePath = filepath.Join(o.CachePath, "brief")
}
}
c := GGUFFileCache(o.CachePath)

Expand Down Expand Up @@ -115,153 +96,5 @@ func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl
}
}

if crawl {
r, err := ml.FetchWebPage(ctx, cli)
if err == nil {
gf, err = parseGGUFFileFromDistroMetadata("ollama", r, ml.Size)
if err == nil {
return gf, nil
}
}

// Fallback to the default behavior.
}

return parseGGUFFileFromRemote(ctx, cli, ml.BlobURL().String(), o)
}

type _OllamaMetadata struct {
Metadata map[string]any `json:"metadata"`
NumParams uint64 `json:"num_params"`
Tensors []struct {
Name string `json:"name"`
Shape []uint64 `json:"shape"`
Offset uint64 `json:"offset"`
Type uint32 `json:"type"`
} `json:"tensors"`
Version uint32 `json:"version"`
}

func parseGGUFFileFromDistroMetadata(source, data string, size uint64) (*GGUFFile, error) {
if source != "ollama" {
return nil, fmt.Errorf("invalid source %q", source)
}

var m _OllamaMetadata
{
if err := json.Unmarshal([]byte(data), &m); err != nil {
return nil, fmt.Errorf("unmarshal metadata: %w", err)
}
if len(m.Metadata) == 0 || len(m.Tensors) == 0 {
return nil, ErrOllamaIllegalMetadata
}
}

// Convert.

var (
arrayMetadataValueRegex = regexp.MustCompile(`^\.{3} \((?P<len>\d+) values\)$`)

gf GGUFFile
)

gf.Header.Magic = GGUFMagicGGUFLe
gf.Header.Version = GGUFVersion(m.Version)
gf.Header.TensorCount = uint64(len(m.Tensors))
gf.Header.MetadataKVCount = uint64(1 /* tokenizer.chat_template */ + len(m.Metadata))
gf.Size = GGUFBytesScalar(size)
gf.ModelParameters = GGUFParametersScalar(m.NumParams)

gf.Header.MetadataKV = make([]GGUFMetadataKV, 0, len(m.Metadata))
for _, k := range func() []string {
ks := maps.Keys(m.Metadata)
ks = append(ks, "tokenizer.chat_template")
sort.Strings(ks)
return ks
}() {
if k == "tokenizer.chat_template" {
gf.Header.MetadataKV = append(gf.Header.MetadataKV, GGUFMetadataKV{
Key: k,
ValueType: GGUFMetadataValueTypeString,
Value: "!!! tokenizer.chat_template !!!",
})
continue
}

var (
vt GGUFMetadataValueType
v = m.Metadata[k]
)
switch vv := v.(type) {
case bool:
vt = GGUFMetadataValueTypeBool
case float64:
vt = GGUFMetadataValueTypeFloat32
v = float32(vv)
case int64:
vt = GGUFMetadataValueTypeUint32
v = uint32(vv)
case string:
vt = GGUFMetadataValueTypeString
if r := arrayMetadataValueRegex.FindStringSubmatch(vv); len(r) == 2 {
vt = GGUFMetadataValueTypeArray
av := GGUFMetadataKVArrayValue{
Type: GGUFMetadataValueTypeString,
Len: funcx.MustNoError(strconv.ParseUint(r[1], 10, 64)),
}
switch _, d, _ := stringx.CutFromRight(k, "."); d {
case "scores":
av.Type = GGUFMetadataValueTypeFloat32
case "token_type":
av.Type = GGUFMetadataValueTypeInt32
}
v = av
}
case []any:
vt = GGUFMetadataValueTypeArray
av := GGUFMetadataKVArrayValue{
Type: GGUFMetadataValueTypeString,
Len: uint64(len(vv)),
}
if av.Len > 0 {
av.Array = vv
switch vv[0].(type) {
case bool:
av.Type = GGUFMetadataValueTypeBool
case float64:
av.Type = GGUFMetadataValueTypeFloat32
case int64:
av.Type = GGUFMetadataValueTypeUint32
}
}
v = av
}
gf.Header.MetadataKV = append(gf.Header.MetadataKV, GGUFMetadataKV{
Key: k,
ValueType: vt,
Value: v,
})
}

gf.TensorInfos = make([]GGUFTensorInfo, 0, len(m.Tensors))
for i := range m.Tensors {
t := m.Tensors[i]
ti := GGUFTensorInfo{
Name: t.Name,
NDimensions: uint32(len(t.Shape)),
Dimensions: t.Shape,
Offset: t.Offset,
Type: GGMLType(t.Type),
}
gf.TensorInfos = append(gf.TensorInfos, ti)
gf.ModelSize += GGUFBytesScalar(ti.Bytes())
}

gf.TensorDataStartOffset = int64(gf.Size - gf.ModelSize)

if gf.ModelParameters != 0 {
gf.ModelBitsPerWeight = GGUFBitsPerWeightScalar(float64(gf.ModelSize) * 8 / float64(gf.ModelParameters))
}

return &gf, nil
}
Loading

0 comments on commit 902a8c5

Please sign in to comment.