Skip to content

Commit

Permalink
feat: parse ollama model
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Jul 4, 2024
1 parent b541748 commit c293abd
Show file tree
Hide file tree
Showing 21 changed files with 1,166 additions and 107 deletions.
61 changes: 61 additions & 0 deletions cmd/gguf-parser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ Usage of gguf-parser ...:
Specify how many layers to offload, which is used to estimate the usage, default is full offloaded. [Deprecated, use --gpu-layers instead] (default -1)
-offload-layers-step uint
Specify the step of layers to offload, works with --offload-layers. [Deprecated, use --gpu-layers-step instead]
-ol-crawl
Crawl the Ollama model instead of blobs fetching, which will be more efficient and faster, but lossy.
-ol-model string
Model name of Ollama, e.g. gemma2.
-parallel-size int
Specify the number of parallel sequences to decode, which is used to estimate the usage, default is 1. (default 1)
-path string
Expand All @@ -61,6 +65,7 @@ Usage of gguf-parser ...:
Url where the GGUF file to load, e.g. https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
-version
Show gguf-parser version.
```
### Parse
Expand Down Expand Up @@ -155,6 +160,62 @@ $ gguf-parser --hf-repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --hf-file="ggml-mode
```
#### Parse Ollama model
```shell
$ gguf-parser --ol-model="gemma2"
+-------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
| MODEL | NAME | ARCH | QUANTIZATION VERSION | FILE TYPE | LITTLE ENDIAN | SIZE | PARAMETERS | BPW |
+ +--------+--------+----------------------+-----------+---------------+----------+------------+----------+
| | gemma2 | gemma2 | 2 | Q4_0 | true | 5.06 GiB | 9.24 B | 4.71 bpw |
+-------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| ARCHITECTURE | MAX CONTEXT LEN | EMBEDDING LEN | EMBEDDING GQA | ATTENTION HEAD CNT | LAYERS | FEED FORWARD LEN | EXPERT CNT | VOCABULARY LEN |
+ +-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| | 8192 | 3584 | 2048 | 16 | 42 | 14336 | 0 | 256000 |
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| TOKENIZER | MODEL | TOKENS SIZE | TOKENS LEN | ADDED TOKENS LEN | BOS TOKEN | EOS TOKEN | UNKNOWN TOKEN | SEPARATOR TOKEN | PADDING TOKEN |
+ +-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| | llama | 3.80 MiB | 256000 | 0 | 2 | 1 | 3 | N/A | 0 |
+-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+----------+--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
| ESTIMATE | ARCH | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS | FULL OFFLOADED | UMA RAM | NONUMA RAM | NONUMA VRAM |
+ +--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
| | gemma2 | 8192 | false | true | 43 (42 + 1) | Yes | 2.69 GiB | 215.97 MiB | 8.43 GiB |
+----------+--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
$ gguf-parser --ol-model="gemma2" --ol-crawl
+-------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
| MODEL | NAME | ARCH | QUANTIZATION VERSION | FILE TYPE | LITTLE ENDIAN | SIZE | PARAMETERS | BPW |
+ +--------+--------+----------------------+-----------+---------------+----------+------------+----------+
| | gemma2 | gemma2 | 2 | Q4_0 | true | 5.06 GiB | 9.24 B | 4.71 bpw |
+-------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| ARCHITECTURE | MAX CONTEXT LEN | EMBEDDING LEN | EMBEDDING GQA | ATTENTION HEAD CNT | LAYERS | FEED FORWARD LEN | EXPERT CNT | VOCABULARY LEN |
+ +-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
| | 8192 | 3584 | 2048 | 16 | 42 | 14336 | 0 | 256000 |
+--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| TOKENIZER | MODEL | TOKENS SIZE | TOKENS LEN | ADDED TOKENS LEN | BOS TOKEN | EOS TOKEN | UNKNOWN TOKEN | SEPARATOR TOKEN | PADDING TOKEN |
+ +-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
| | llama | 0 B | 256000 | 0 | 2 | 1 | 3 | N/A | 0 |
+-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+----------+--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
| ESTIMATE | ARCH | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS | FULL OFFLOADED | UMA RAM | NONUMA RAM | NONUMA VRAM |
+ +--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
| | gemma2 | 8192 | false | true | 43 (42 + 1) | Yes | 2.69 GiB | 215.99 MiB | 8.12 GiB |
+----------+--------+--------------+-----------------+--------------+----------------+----------------+----------+------------+-------------+
```
### Estimate
#### Estimate with zero layers offload
Expand Down
5 changes: 5 additions & 0 deletions cmd/gguf-parser/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@ require (

require (
github.com/henvic/httpretty v0.1.3 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/tools v0.21.0 // indirect
Expand Down
15 changes: 15 additions & 0 deletions cmd/gguf-parser/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 27 additions & 17 deletions cmd/gguf-parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ import (
"strconv"
"strings"
"sync"
stdjson "encoding/json"

"github.com/olekukonko/tablewriter"

"github.com/thxcode/gguf-parser-go/util/json"

. "github.com/thxcode/gguf-parser-go"
)

Expand All @@ -24,9 +25,12 @@ func main() {

var (
// model options
path string
url string
repo, file string
path string
url string
hfRepo string
hfFile string
olModel string
olCrawl bool
// read options
debug bool
skipTLSVerify bool
Expand All @@ -47,8 +51,8 @@ func main() {
skipTokenizer bool
skipEstimate bool
inMib bool
json bool
jsonPretty = true
inJson bool
inPrettyJson = true
)
fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
fs.Usage = func() {
Expand All @@ -62,14 +66,18 @@ func main() {
"https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF"+
"/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. "+
"Note that gguf-parser does not need to download the entire GGUF file.")
fs.StringVar(&repo, "repo", repo, "Repository of HuggingFace which the GGUF file store, e.g. "+
fs.StringVar(&hfRepo, "repo", hfRepo, "Repository of HuggingFace which the GGUF file store, e.g. "+
"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --file. [Deprecated, use --hf-repo instead]")
fs.StringVar(&file, "file", file, "Model file below the --repo, e.g. "+
fs.StringVar(&hfFile, "file", hfFile, "Model file below the --repo, e.g. "+
"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. [Deprecated, use --hf-file instead]") // Deprecated.
fs.StringVar(&repo, "hf-repo", repo, "Repository of HuggingFace which the GGUF file store, e.g. "+
fs.StringVar(&hfRepo, "hf-repo", hfRepo, "Repository of HuggingFace which the GGUF file store, e.g. "+
"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --hf-file.") // Deprecated.
fs.StringVar(&file, "hf-file", file, "Model file below the --repo, e.g. "+
fs.StringVar(&hfFile, "hf-file", hfFile, "Model file below the --repo, e.g. "+
"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.")
fs.StringVar(&olModel, "ol-model", olModel, "Model name of Ollama, e.g. "+
"gemma2.")
fs.BoolVar(&olCrawl, "ol-crawl", olCrawl, "Crawl the Ollama model instead of blobs fetching, "+
"which will be more efficient and faster, but lossy.")
fs.BoolVar(&debug, "debug", debug, "Enable debugging, verbosity.")
fs.BoolVar(&skipTLSVerify, "skip-tls-verify", skipTLSVerify, "Skip TLS verification, works with --url.")
fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Specify the size of prompt context, "+
Expand Down Expand Up @@ -113,8 +121,8 @@ func main() {
fs.BoolVar(&skipTokenizer, "skip-tokenizer", skipTokenizer, "Skip to display tokenizer metadata")
fs.BoolVar(&skipEstimate, "skip-estimate", skipEstimate, "Skip to estimate.")
fs.BoolVar(&inMib, "in-mib", inMib, "Display the estimated result in table with MiB.")
fs.BoolVar(&json, "json", json, "Output as JSON.")
fs.BoolVar(&jsonPretty, "json-pretty", jsonPretty, "Output as pretty JSON.")
fs.BoolVar(&inJson, "json", inJson, "Output as JSON.")
fs.BoolVar(&inPrettyJson, "json-pretty", inPrettyJson, "Output as pretty JSON.")
if err := fs.Parse(os.Args[1:]); err != nil {
fmt.Println(err.Error())
os.Exit(1)
Expand Down Expand Up @@ -192,8 +200,10 @@ func main() {
gf, err = ParseGGUFFile(path, ropts...)
case url != "":
gf, err = ParseGGUFFileRemote(ctx, url, ropts...)
case repo != "" && file != "":
gf, err = ParseGGUFFileFromHuggingFace(ctx, repo, file, ropts...)
case hfRepo != "" && hfFile != "":
gf, err = ParseGGUFFileFromHuggingFace(ctx, hfRepo, hfFile, ropts...)
case olModel != "":
gf, err = ParseGGUFFileFromOllama(ctx, olModel, olCrawl, ropts...)
}
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to parse GGUF file: %s\n", err.Error())
Expand Down Expand Up @@ -244,7 +254,7 @@ func main() {
}
}

if json {
if inJson {
o := map[string]any{}
if !skipModel {
o["model"] = m
Expand Down Expand Up @@ -286,8 +296,8 @@ func main() {
o["estimate"] = es
}

enc := stdjson.NewEncoder(os.Stdout)
if jsonPretty {
enc := json.NewEncoder(os.Stdout)
if inPrettyJson {
enc.SetIndent("", " ")
}
if err := enc.Encode(o); err != nil {
Expand Down
80 changes: 11 additions & 69 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,18 @@ package gguf_parser

import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net/http"
"regexp"
"strconv"
"strings"
"time"

"golang.org/x/exp/constraints"

"github.com/thxcode/gguf-parser-go/util/bytex"
"github.com/thxcode/gguf-parser-go/util/funcx"
"github.com/thxcode/gguf-parser-go/util/httpx"
"github.com/thxcode/gguf-parser-go/util/osx"
)

Expand All @@ -36,10 +32,14 @@ type GGUFFile struct {
TensorInfos GGUFTensorInfos `json:"tensorInfos"`
// Padding is the padding size of the GGUF file,
// which is used to split Header and TensorInfos from tensor data.
//
// This might be empty if parse from crawler.
Padding int64 `json:"padding"`
// TensorDataStartOffset is the offset in bytes of the tensor data in this file.
//
// The offset is the start of the file.
//
// This might be lossy if parse from crawler.
TensorDataStartOffset int64 `json:"tensorDataStartOffset"`

/* Appendix */
Expand Down Expand Up @@ -151,17 +151,21 @@ type (
Len uint64 `json:"len"`
// Array holds all array items.
//
// Array may be empty if skipping.
// This might be empty if skipping or parse from crawler.
Array []any `json:"array,omitempty"`

/* Appendix */

// StartOffset is the offset in bytes of the GGUFMetadataKVArrayValue in the GGUFFile file.
//
// The offset is the start of the file.
//
// This might be empty if parse from crawler.
StartOffset int64 `json:"startOffset"`

// Size is the size of the array in bytes.
//
// This might be empty if parse from crawler.
Size int64 `json:"endOffset"`
}

Expand Down Expand Up @@ -195,6 +199,8 @@ type (
// StartOffset is the offset in bytes of the GGUFTensorInfo in the GGUFFile file.
//
// The offset is the start of the file.
//
// This might be empty if parse from crawler.
StartOffset int64 `json:"startOffset"`
}

Expand Down Expand Up @@ -237,70 +243,6 @@ func ParseGGUFFile(path string, opts ...GGUFReadOption) (*GGUFFile, error) {
return parseGGUFFile(s, f, o)
}

// ParseGGUFFileRemote parses a GGUF file from a remote URL,
// and returns a GGUFFile, or an error if any.
func ParseGGUFFileRemote(ctx context.Context, url string, opts ...GGUFReadOption) (*GGUFFile, error) {
var o _GGUFReadOptions
for _, opt := range opts {
opt(&o)
}

cli := httpx.Client(
httpx.ClientOptions().
WithUserAgent("gguf-parser-go").
If(o.Debug, func(x *httpx.ClientOption) *httpx.ClientOption {
return x.WithDebug()
}).
WithTimeout(0).
WithTransport(
httpx.TransportOptions().
WithoutKeepalive().
TimeoutForDial(5*time.Second).
TimeoutForTLSHandshake(5*time.Second).
TimeoutForResponseHeader(5*time.Second).
If(o.SkipProxy, func(x *httpx.TransportOption) *httpx.TransportOption {
return x.WithoutProxy()
}).
If(o.ProxyURL != nil, func(x *httpx.TransportOption) *httpx.TransportOption {
return x.WithProxy(http.ProxyURL(o.ProxyURL))
}).
If(o.SkipTLSVerification, func(x *httpx.TransportOption) *httpx.TransportOption {
return x.WithoutInsecureVerify()
})))

var (
f io.ReadSeeker
s int64
)
{
req, err := httpx.NewGetRequestWithContext(ctx, url)
if err != nil {
return nil, fmt.Errorf("new request: %w", err)
}

var sf *httpx.SeekerFile
if o.BufferSize > 0 {
sf, err = httpx.OpenSeekerFileWithSize(cli, req, o.BufferSize, 0)
} else {
sf, err = httpx.OpenSeekerFile(cli, req)
}
if err != nil {
return nil, fmt.Errorf("open http file: %w", err)
}
defer osx.Close(sf)
f = io.NewSectionReader(sf, 0, sf.Len())
s = sf.Len()
}

return parseGGUFFile(s, f, o)
}

// ParseGGUFFileFromHuggingFace parses a GGUF file from Hugging Face,
// and returns a GGUFFile, or an error if any.
func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, file string, opts ...GGUFReadOption) (*GGUFFile, error) {
return ParseGGUFFileRemote(ctx, fmt.Sprintf("https://huggingface.co/%s/resolve/main/%s", repo, file), opts...)
}

func parseGGUFFile(s int64, f io.ReadSeeker, o _GGUFReadOptions) (_ *GGUFFile, err error) {
var gf GGUFFile
var bo binary.ByteOrder = binary.LittleEndian
Expand Down
Loading

0 comments on commit c293abd

Please sign in to comment.