diff --git a/.github/workflows/cmd.yml b/.github/workflows/cmd.yml index 01b3303..d35a539 100644 --- a/.github/workflows/cmd.yml +++ b/.github/workflows/cmd.yml @@ -40,7 +40,7 @@ jobs: with: go-version: "1.22.9" cache-dependency-path: | - **/go.sum + cmd/**/go.sum - name: Make run: make build env: diff --git a/Makefile b/Makefile index e9a0430..1eea47d 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ lint: if [[ "$(LINT_DIRTY)" == "true" ]]; then \ if [[ -n $$(git status --porcelain) ]]; then \ echo "Code tree is dirty."; \ - exit 1; \ + git diff --exit-code; \ fi; \ fi diff --git a/README.md b/README.md index eb2d651..c552bd8 100644 --- a/README.md +++ b/README.md @@ -508,6 +508,7 @@ $ gguf-parser --hf-repo="gpustack/FLUX.1-dev-GGUF" --hf-file="FLUX.1-dev-FP16.gg +--------+-----------------+-------------+---------------+----------------+-----------+-----------+-----------+-----------+ $ # Parse FLUX.1-dev Model with Autoencoder tiling +$ gguf-parser --hf-repo="gpustack/FLUX.1-dev-GGUF" --hf-file="FLUX.1-dev-FP16.gguf" --vae-tiling +----------------------------------------------------------------------------------------------+ | METADATA | +-------+------+-----------+--------------+---------------+-----------+------------+-----------+ diff --git a/gen.regression.go b/gen.regression.go index 483d713..2024a4e 100644 --- a/gen.regression.go +++ b/gen.regression.go @@ -13,6 +13,8 @@ import ( "go/format" "gonum.org/v1/gonum/mat" + "golang.org/x/exp/maps" + "sort" ) type LinearRegression struct { @@ -100,13 +102,13 @@ import "math" // {{ .Name }} returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func {{ .Name }}(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{ {{ range .PolynomialRegression.Coefficients }}{{ . }}, {{ end }} } + coefficients := []float64{ {{ range $i, $c := .PolynomialRegression.Coefficients }}{{ if eq $i 0 }}{{ printf "%.4f" $c }}{{ else }}{{ printf "%.10f" $c }}{{ end }}, {{ end }} } degree := {{ .PolynomialRegression.Degree }} x := float64(width * height) {{ if .LinearRegression -}} if flashAttention { - coefficients = []float64{ {{ .LinearRegression.Intercept }}, {{ .LinearRegression.Slope }} } + coefficients = []float64{ {{ printf "%.5f" .LinearRegression.Intercept }}, {{ printf "%.10f" .LinearRegression.Slope }} } degree = 1 } {{- end }} @@ -407,10 +409,11 @@ func {{ .Name }}(width, height uint32, flashAttention bool) uint64 { Degree: 2, } - xs, ys := make([]float64, 0, len(t.x2y)), make([]float64, 0, len(t.x2y)) - for x, y := range t.x2y { - xs = append(xs, x) - ys = append(ys, y*1024*1024) // MB to B + xs := maps.Keys(t.x2y) + sort.Float64s(xs) + ys := make([]float64, len(xs)) + for j, x := range xs { + ys[j] = t.x2y[x] * 1024 * 1024 // MB to B } pr.Fit(xs, ys) @@ -440,10 +443,11 @@ func {{ .Name }}(width, height uint32, flashAttention bool) uint64 { lr := LinearRegression{} - xs, ys := make([]float64, 0, len(t.fax2y)), make([]float64, 0, len(t.fax2y)) - for x, y := range t.fax2y { - xs = append(xs, x) - ys = append(ys, y*1024*1024) // MB to B + xs := maps.Keys(t.fax2y) + sort.Float64s(xs) + ys := make([]float64, len(xs)) + for j, x := range xs { + ys[j] = t.fax2y[x] * 1024 * 1024 // MB to B } lr.Fit(xs, ys) diff --git a/zz_generated.diffusion_model_memory_usage.regression.go b/zz_generated.diffusion_model_memory_usage.regression.go index 5f870f1..5a17e83 100644 --- a/zz_generated.diffusion_model_memory_usage.regression.go +++ b/zz_generated.diffusion_model_memory_usage.regression.go @@ -5,7 +5,7 @@ import "math" // GuessSD1DiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSD1DiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{7.8763685671743e+06, 161.42301986333496, 0.007812489338703485} + coefficients := []float64{7876368.56718, 161.4230198633, 0.0078124893} degree := 2 x := float64(width * height) @@ -19,12 +19,12 @@ func GuessSD1DiffusionModelMemoryUsage(width, height uint32, flashAttention bool // GuessSD2DiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSD2DiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{-3.5504397905618614e+08, -1193.3271458642232, 0.005402381760522009} + coefficients := []float64{-355043979.05619, -1193.3271458642, 0.0054023818} degree := 2 x := float64(width * height) if flashAttention { - coefficients = []float64{3.78068128077788e+06, 513.2102510934714} + coefficients = []float64{3780681.28078, 513.2102510935} degree = 1 } @@ -38,12 +38,12 @@ func GuessSD2DiffusionModelMemoryUsage(width, height uint32, flashAttention bool // GuessSDXLDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSDXLDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{5.554129038929968e+07, 138.31961166554433, 0.0006109454572342757} + coefficients := []float64{55541290.38930, 138.3196116655, 0.0006109455} degree := 2 x := float64(width * height) if flashAttention { - coefficients = []float64{-5.95880278052181e+06, 500.0687898914631} + coefficients = []float64{-5958802.78052, 500.0687898915} degree = 1 } @@ -57,12 +57,12 @@ func GuessSDXLDiffusionModelMemoryUsage(width, height uint32, flashAttention boo // GuessSDXLRefinerDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSDXLRefinerDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{4.939599234485548e+07, 155.2477810191175, 0.0007351735797614931} + coefficients := []float64{49395992.34486, 155.2477810191, 0.0007351736} degree := 2 x := float64(width * height) if flashAttention { - coefficients = []float64{7.0313433199802125e+06, 599.4137437226634} + coefficients = []float64{7031343.31998, 599.4137437227} degree = 1 } @@ -76,7 +76,7 @@ func GuessSDXLRefinerDiffusionModelMemoryUsage(width, height uint32, flashAttent // GuessSD3MediumDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSD3MediumDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{1.6529921370035086e+07, 234.66562477184195, 0.0014648995324747492} + coefficients := []float64{16529921.37003, 234.6656247718, 0.0014648995} degree := 2 x := float64(width * height) @@ -90,7 +90,7 @@ func GuessSD3MediumDiffusionModelMemoryUsage(width, height uint32, flashAttentio // GuessSD35MediumDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSD35MediumDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{1.7441103472644456e+07, 281.695681980568, 0.0014651233076620938} + coefficients := []float64{17441103.47264, 281.6956819806, 0.0014651233} degree := 2 x := float64(width * height) @@ -104,7 +104,7 @@ func GuessSD35MediumDiffusionModelMemoryUsage(width, height uint32, flashAttenti // GuessSD35LargeDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessSD35LargeDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{2.320436920291992e+07, 410.3731196298318, 0.002319594715894278} + coefficients := []float64{23204369.20292, 410.3731196298, 0.0023195947} degree := 2 x := float64(width * height) @@ -118,7 +118,7 @@ func GuessSD35LargeDiffusionModelMemoryUsage(width, height uint32, flashAttentio // GuessFLUXDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, // which is calculated by linear regression or polynomial regression. func GuessFLUXDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { - coefficients := []float64{4.651166867423782e+07, 997.7758807792155, 0.001457339256095295} + coefficients := []float64{46511668.67424, 997.7758807792, 0.0014573393} degree := 2 x := float64(width * height)