Skip to content

Commit

Permalink
refactor: estimate
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Jun 12, 2024
1 parent 761eb4d commit 0f7733d
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 179 deletions.
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,26 @@ spew.Dump(f.Tokenizer())

```

### Estimate usage
### Estimate usage in [llama.cpp](https://github.com/ggerganov/llama.cpp)

```go
spew.Dump(f.Estimate())
spew.Dump(f.EstimateLLaMACppUsage())

```

#### Estimate with larger prompt

```go
spew.Dump(f.Estimate(WithContextSize(4096) /* 4K */))
spew.Dump(f.EstimateLLaMACppUsage(WithContextSize(4096) /* 4K */))

```

#### Estimate with specific offload layers

```go
spew.Dump(f.EstimateLLaMACppUsage(WithOffloadLayers(10)))
```

## License

MIT
18 changes: 8 additions & 10 deletions cmd/gguf-parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func main() {
ropts = append(ropts, SkipTLSVerification())
}

eopts := []GGUFEstimateOption{
eopts := []LLaMACppUsageEstimateOption{
WithCacheValueType(GGMLTypeF16),
WithCacheKeyType(GGMLTypeF16),
}
Expand Down Expand Up @@ -168,7 +168,7 @@ func main() {
m GGUFModelMetadata
a GGUFArchitectureMetadata
t GGUFTokenizerMetadata
e GGUFEstimate
e LLaMACppUsageEstimate
)
if !skipModel {
m = gf.Model()
Expand All @@ -180,7 +180,7 @@ func main() {
t = gf.Tokenizer()
}
if !skipEstimate {
e = gf.Estimate(eopts...)
e = gf.EstimateLLaMACppUsage(eopts...)
}

// Output
Expand All @@ -197,7 +197,7 @@ func main() {
o["tokenizer"] = t
}
if !skipEstimate {
es := e.Sum(!noMMap)
es := e.Summarize(!noMMap)
o["estimate"] = es
}

Expand Down Expand Up @@ -267,7 +267,7 @@ func main() {
}

if !skipEstimate {
es := e.Sum(!noMMap)
es := e.Summarize(!noMMap)
if ctxSize <= 0 {
if a.MaximumContextLength == 0 {
a = gf.Architecture()
Expand All @@ -276,20 +276,18 @@ func main() {
}
tprintf(
"ESTIMATE",
[]string{"Mem. Arch", "MMap", "Context Size", "(CPU) RAM", "(GPU) VRAM"},
[]string{"Mem. Arch", "MMap", "Context Size", "Usage"},
[]string{
"UMA",
sprintf(!noMMap),
sprintf(ctxSize),
sprintf(es.UMA.RAM),
sprintf(es.UMA.VRAM),
sprintf(es.UMA),
},
[]string{
"NonUMA",
sprintf(!noMMap),
sprintf(ctxSize),
sprintf(es.NonUMA.RAM),
sprintf(es.NonUMA.VRAM),
fmt.Sprintf("%s(RAM) + %s (VRAM)", es.NonUMA.RAM, es.NonUMA.VRAM),
})
}
}
Expand Down
Loading

0 comments on commit 0f7733d

Please sign in to comment.