Skip to content

Commit

Permalink
feat: introduce in-short arg
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Aug 28, 2024
1 parent 12699ba commit b06962d
Showing 1 changed file with 30 additions and 30 deletions.
60 changes: 30 additions & 30 deletions cmd/gguf-parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,13 @@ func main() {
Usage: "Skip to estimate. " +
"By default, gguf-parser always estimates the file which types with \"model\".",
},
&cli.BoolFlag{
Destination: &inShort,
Value: inShort,
Category: "Output",
Name: "in-short",
Usage: "Display the estimated result in table in short form.",
},
&cli.BoolFlag{
Destination: &inMib,
Value: inMib,
Expand Down Expand Up @@ -675,6 +682,7 @@ var (
// output options
raw bool
rawOutput string
inShort bool
skipMetadata bool
skipArchitecture bool
skipTokenizer bool
Expand Down Expand Up @@ -1253,41 +1261,30 @@ func mainAction(c *cli.Context) error {
}

if !skipEstimate && e.Type == "model" {
var (
hds [][]any
bds [][]any
)
hds := make([][]any, 2)
es := e.Summarize(mmap, platformRAM, platformVRAM)
hds = [][]any{
{
if !inShort {
hds[0] = []any{
"Arch",
"Context Size",
"Batch Size (L / P)",
"Flash Attention",
"MMap Load",
"Embedding Only",
"Distributable",
"Offload Layers",
"Full Offloaded",
"RAM",
"RAM",
"RAM",
},
{
}
hds[1] = []any{
"Arch",
"Context Size",
"Batch Size (L / P)",
"Flash Attention",
"MMap Load",
"Embedding Only",
"Distributable",
"Offload Layers",
"Full Offloaded",
"Layers",
"UMA",
"NonUMA",
},
}
}
hds[0] = append(hds[0], "Offload Layers", "Full Offloaded", "RAM", "RAM", "RAM")
hds[1] = append(hds[1], "Offload Layers", "Full Offloaded", "Layers", "UMA", "NonUMA")
for i := range es.Memory[0].VRAMs {
hds[0] = append(hds[0], fmt.Sprintf("VRAM %d", i), fmt.Sprintf("VRAM %d", i), fmt.Sprintf("VRAM %d", i))
hds[1] = append(hds[1], "Layers", "UMA", "NonUMA")
Expand Down Expand Up @@ -1320,23 +1317,26 @@ func mainAction(c *cli.Context) error {
es.Memory = ess
}

bds = make([][]any, len(es.Memory))
bds := make([][]any, len(es.Memory))
for i := range es.Memory {
bds[i] = []any{
sprintf(es.Architecture),
sprintf(es.ContextSize),
sprintf("%d / %d", es.LogicalBatchSize, es.PhysicalBatchSize),
sprintf(tenary(flashAttention, tenary(es.FlashAttention, "Enabled", "Not Supported"), "Disabled")),
sprintf(tenary(mmap, tenary(!es.NoMMap, "Enabled", "Not Supported"), "Disabled")),
sprintf(tenary(es.EmbeddingOnly, "Yes", "No")),
sprintf(tenary(es.Distributable, "Supported", "Not Supported")),
if !inShort {
bds[i] = []any{
sprintf(es.Architecture),
sprintf(es.ContextSize),
sprintf("%d / %d", es.LogicalBatchSize, es.PhysicalBatchSize),
sprintf(tenary(flashAttention, tenary(es.FlashAttention, "Enabled", "Not Supported"), "Disabled")),
sprintf(tenary(mmap, tenary(!es.NoMMap, "Enabled", "Not Supported"), "Disabled")),
sprintf(tenary(es.EmbeddingOnly, "Yes", "No")),
sprintf(tenary(es.Distributable, "Supported", "Not Supported")),
}
}
bds[i] = append(bds[i],
sprintf(tenary(es.Memory[i].FullOffloaded, sprintf("%d (%d + 1)",
es.Memory[i].OffloadLayers, es.Memory[i].OffloadLayers-1), es.Memory[i].OffloadLayers)),
sprintf(tenary(es.Memory[i].FullOffloaded, "Yes", "No")),
sprintf(tenary(!es.Memory[i].RAM.HandleOutputLayer, es.Memory[i].RAM.HandleLayers, sprintf("%d + 1", es.Memory[i].RAM.HandleLayers))),
sprintf(es.Memory[i].RAM.UMA),
sprintf(es.Memory[i].RAM.NonUMA),
}
sprintf(es.Memory[i].RAM.NonUMA))
for _, v := range es.Memory[i].VRAMs {
bds[i] = append(bds[i],
sprintf(tenary(!v.HandleOutputLayer, v.HandleLayers, sprintf("%d + 1", v.HandleLayers))),
Expand Down

0 comments on commit b06962d

Please sign in to comment.