diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
new file mode 100644
index 0000000..a1f38f3
--- /dev/null
+++ b/cmd/gguf-parser/README.md
@@ -0,0 +1,234 @@
+# GGUF Parser
+
+Review/Check/Estimate [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) file.
+
+## Usage
+
+```shell
+$ gguf-parser --help
+Usage of gguf-parser ...:
+  -batch-size int
+        Specify the physical maximum batch size, which is used to estimate the usage, default is 512. (default 512)
+  -ctx-size int
+        Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default -1)
+  -debug
+        Enable debugging, verbosity.
+  -file string
+        Model file below the --repo, e.g. Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.
+  -flash-attention
+        Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM.
+  -json
+        Output as JSON,
+  -json-pretty
+        Output as pretty JSON. (default true)
+  -kv-type string
+        Specify the type of Key-Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1], default is f16. Use quantization type means enabling --flash-attention as well. (default "f16")
+  -no-mmap
+        Specify disabling Memory-Mapped using, which is used to estimate the usage. Memory-Mapped can avoid loading the entire model weights into RAM.
+  -offload-layers int
+        Specify how many layers to offload, which is used to estimate the usage, default is full offloaded. (default -1)
+  -offload-layers-step uint
+        Specify the step of layers to offload, works with --offload-layers.
+  -parallel-size int
+        Specify the number of parallel sequences to decode, which is used to estimate the usage, default is 1. (default 1)
+  -path string
+        Path where the GGUF file to load, e.g. ~/.cache/lm-studio/models/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.
+  -repo string
+        Repository of HuggingFace which the GGUF file store, e.g. NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --file.
+  -skip-architecture
+        Skip to display architecture metadata.
+  -skip-estimate
+        Skip to estimate.
+  -skip-model
+        Skip to display model metadata.
+  -skip-tls-verify
+        Skip TLS verification, works with --url.
+  -skip-tokenizer
+        Skip to display tokenizer metadata
+  -url string
+        Url where the GGUF file to load, e.g. https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
+  -version
+        Show gguf-parser version.
+```
+
+### Parse
+
+#### parse local GGUF file
+
+```shell
+$ gguf-parser --path="~/.cache/lm-studio/models/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf"
++-------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+| MODEL | NAME  | ARCH  | QUANTIZATION VERSION |   FILE TYPE    | LITTLE ENDIAN |   SIZE   | PARAMETERS |   BPW    |
++       +-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+|       | jeffq | llama |          2           | IQ3_XXS/Q5_K_M |     true      | 4.78 GiB |   7.24 B   | 5.67 bpw |
++-------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+| ARCHITECTURE | MAX CONTEXT LEN | EMBEDDING LEN | EMBEDDING GQA | ATTENTION HEAD CNT | LAYERS | FEED FORWARD LEN | EXPERT CNT | VOCABULARY LEN |
++              +-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+|              |      32768      |     4096      |     1024      |         32         |   32   |      14336       |     0      |     32032      |
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+| TOKENIZER | MODEL | TOKENS SIZE | TOKENS LEN | ADDED TOKENS LEN | BOS TOKEN | EOS TOKEN | UNKNOWN TOKEN | SEPARATOR TOKEN | PADDING TOKEN |
++           +-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+|           | llama | 450.50 KiB  |   32032    |        0         |     1     |   32000   |      N/A      |       N/A       |      N/A      |
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+
++----------+-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS | UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+|          | llama |    32768     |      false      |     true     |       32       | 4.09 GiB | 238.39 MiB |  10.70 GiB  |
++----------+-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+
+```
+
+#### parse remote GGUF file
+
+```shell
+$ gguf-parser --url="https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF/resolve/main/Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf"
++-------+----------+-------+----------------------+-------------+---------------+--------+------------+----------+
+| MODEL |   NAME   | ARCH  | QUANTIZATION VERSION |  FILE TYPE  | LITTLE ENDIAN |  SIZE  | PARAMETERS |   BPW    |
++       +----------+-------+----------------------+-------------+---------------+--------+------------+----------+
+|       | emozilla | llama |          2           | Q4_K/Q3_K_M |     true      | 21 GiB |  46.70 B   | 3.86 bpw |
++-------+----------+-------+----------------------+-------------+---------------+--------+------------+----------+
+
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+| ARCHITECTURE | MAX CONTEXT LEN | EMBEDDING LEN | EMBEDDING GQA | ATTENTION HEAD CNT | LAYERS | FEED FORWARD LEN | EXPERT CNT | VOCABULARY LEN |
++              +-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+|              |      32768      |     4096      |     1024      |         32         |   32   |      14336       |     8      |     32002      |
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+| TOKENIZER | MODEL | TOKENS SIZE | TOKENS LEN | ADDED TOKENS LEN | BOS TOKEN | EOS TOKEN | UNKNOWN TOKEN | SEPARATOR TOKEN | PADDING TOKEN |
++           +-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+|           | llama | 449.91 KiB  |   32002    |        0         |     1     |   32000   |       0       |       N/A       |       2       |
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      false      |    false     |       32       | 25.08 GiB | 395.24 MiB |  26.94 GiB  |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Parse HuggingFace GGUF file
+
+```shell
+$ gguf-parser --repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --file="ggml-model-Q5_K_M.gguf" 
++-------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+| MODEL | NAME  | ARCH  | QUANTIZATION VERSION |   FILE TYPE    | LITTLE ENDIAN |   SIZE   | PARAMETERS |   BPW    |
++       +-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+|       | model | llama |          2           | IQ3_XXS/Q5_K_M |     true      | 5.33 GiB |   8.03 B   | 5.70 bpw |
++-------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
+
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+| ARCHITECTURE | MAX CONTEXT LEN | EMBEDDING LEN | EMBEDDING GQA | ATTENTION HEAD CNT | LAYERS | FEED FORWARD LEN | EXPERT CNT | VOCABULARY LEN |
++              +-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+|              |      8192       |     4096      |     1024      |         32         |   32   |      14336       |     0      |     128256     |
++--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
+
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+| TOKENIZER | MODEL | TOKENS SIZE | TOKENS LEN | ADDED TOKENS LEN | BOS TOKEN | EOS TOKEN | UNKNOWN TOKEN | SEPARATOR TOKEN | PADDING TOKEN |
++           +-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+|           | gpt2  |    2 MiB    |   128256   |        0         |  128000   |  128001   |    128002     |       N/A       |       0       |
++-----------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
+
++----------+-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS | UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+|          | llama |     8192     |      false      |     true     |       32       | 1.08 GiB | 234.61 MiB |  6.25 GiB   |
++----------+-------+--------------+-----------------+--------------+----------------+----------+------------+-------------+
+
+```
+
+### Estimate
+
+#### Estimate with zero layers offload
+
+```shell
+$ gguf-parser --repo="mradermacher/Falcon2-8B-Dutch-GGUF" --file="Falcon2-8B-Dutch.Q5_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --offload-layers=0
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      false      |    false     |       0        | 25.08 GiB | 25.23 GiB  |  2.10 GiB   |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Estimate with specific layers offload
+
+```shell
+$ gguf-parser --repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --offload-layers=10
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      false      |    false     |       10       | 25.08 GiB | 17.50 GiB  |  9.83 GiB   |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Estimate with specific context size
+
+```shell
+$ gguf-parser --repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --ctx-size=4096
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |     4096     |      false      |    false     |       32       | 21.53 GiB | 339.24 MiB |  21.64 GiB  |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Estimate with Flash Attention
+
+```shell
+$ gguf-parser --repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --flash-attention
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      true       |    false     |       32       | 25.08 GiB | 395.24 MiB |  25.08 GiB  |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Estimate with No MMap
+
+```shell
+$ gguf-parser --repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --offload-layers=10 --no-mmap
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      false      |    false     |       10       | 25.08 GiB | 17.50 GiB  |  9.83 GiB   |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+#### Estimate step-by-step offload layers
+
+```shell
+$ gguf-parser --repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --offload-layers=10 --offload-layers-step=5
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+| ESTIMATE | ARCH  | CONTEXT SIZE | FLASH ATTENTION | MMAP SUPPORT | OFFLOAD LAYERS |  UMA RAM  | NONUMA RAM | NONUMA VRAM |
++          +-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+|          | llama |    32768     |      false      |    false     |       0        | 25.08 GiB | 25.23 GiB  |  2.10 GiB   |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       5        |           | 21.36 GiB  |  5.97 GiB   |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       10       |           | 17.50 GiB  |  9.83 GiB   |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       15       |           | 13.63 GiB  |  13.70 GiB  |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       20       |           |  9.77 GiB  |  17.56 GiB  |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       25       |           |  5.91 GiB  |  21.42 GiB  |
++          +       +              +                 +              +----------------+           +------------+-------------+
+|          |       |              |                 |              |       32       |           | 395.24 MiB |  26.94 GiB  |
++----------+-------+--------------+-----------------+--------------+----------------+-----------+------------+-------------+
+
+```
+
+## License
+
+MIT
diff --git a/cmd/gguf-parser/go.mod b/cmd/gguf-parser/go.mod
index aa4cdde..f6441fe 100644
--- a/cmd/gguf-parser/go.mod
+++ b/cmd/gguf-parser/go.mod
@@ -10,7 +10,6 @@ require (
 )
 
 require (
-	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/henvic/httpretty v0.1.3 // indirect
 	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
diff --git a/cmd/gguf-parser/go.sum b/cmd/gguf-parser/go.sum
index 59b81c1..2d428fb 100644
--- a/cmd/gguf-parser/go.sum
+++ b/cmd/gguf-parser/go.sum
@@ -1,7 +1,5 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
-github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
 github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
 github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
index 3a94237..cca74ea 100644
--- a/cmd/gguf-parser/main.go
+++ b/cmd/gguf-parser/main.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"strconv"
 	"strings"
+	"sync"
 	stdjson "encoding/json"
 
 	"github.com/olekukonko/tablewriter"
@@ -22,22 +23,22 @@ func main() {
 	// Parse arguments.
 
 	var (
-		// model
-		path        string
-		url         string
-		repo, model string
+		// model options
+		path       string
+		url        string
+		repo, file string
 		// read options
-		debug     bool
-		skipProxy bool
-		skipTLS   bool
+		debug         bool
+		skipTLSVerify bool
 		// estimate options
-		ctxSize        = -1
-		batchSize      = 512
-		parallelSize   = 1
-		kvType         = "f16"
-		offloadLayers  = -1
-		flashAttention bool
-		noMMap         bool
+		ctxSize           = -1
+		batchSize         = 512
+		parallelSize      = 1
+		kvType            = "f16"
+		flashAttention    bool
+		noMMap            bool
+		offloadLayers     = -1
+		offloadLayersStep uint64
 		// output options
 		version          bool
 		skipModel        bool
@@ -52,34 +53,50 @@ func main() {
 		_, _ = fmt.Fprintf(fs.Output(), "Usage of gguf-parser %v:\n", Version)
 		fs.PrintDefaults()
 	}
-	fs.StringVar(&path, "path", path, "Path to load model, e.g. ~/.cache"+
+	fs.StringVar(&path, "path", path, "Path where the GGUF file to load, e.g. ~/.cache"+
 		"/lm-studio/models/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/"+
-		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf")
-	fs.StringVar(&url, "url", url, "Url to load model, e.g. "+
+		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.")
+	fs.StringVar(&url, "url", url, "Url where the GGUF file to load, e.g. "+
 		"https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF"+
-		"/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf")
-	fs.StringVar(&repo, "repo", repo, "Repo of HuggingFace, e.g. "+
-		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF")
-	fs.StringVar(&model, "model", model, "Model below the --repo, e.g. "+
-		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf")
-	fs.BoolVar(&debug, "debug", debug, "Debug mode")
-	fs.BoolVar(&skipProxy, "skip-proxy", skipProxy, "Skip using proxy when reading from a remote URL")
-	fs.BoolVar(&skipTLS, "skip-tls", skipTLS, "Skip TLS verification when reading from a remote URL")
-	fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Context size to estimate memory usage, default is equal to the model's maximum context size")
-	fs.IntVar(&batchSize, "batch-size", batchSize, "Physical maximum batch size")
-	fs.IntVar(&parallelSize, "parallel", parallelSize, "Number of parallel sequences to decode")
-	fs.StringVar(&kvType, "kv-type", kvType, "Key-Value cache type, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1], "+
-		"using quantization type means enabling Flash Attention as well")
-	fs.IntVar(&offloadLayers, "offload-layers", offloadLayers, "Specify how many layers to offload, default is fully offloading")
-	fs.BoolVar(&flashAttention, "flash-attention", flashAttention, "Enable Flash Attention to reduce the memory usage, which influences the estimate result")
-	fs.BoolVar(&noMMap, "no-mmap", noMMap, "Disable using memory-mapped model(file) loading, which influences the estimate result")
-	fs.BoolVar(&version, "version", version, "Show version")
-	fs.BoolVar(&skipModel, "skip-model", skipModel, "Skip model metadata")
-	fs.BoolVar(&skipArchitecture, "skip-architecture", skipArchitecture, "Skip architecture metadata")
-	fs.BoolVar(&skipTokenizer, "skip-tokenizer", skipTokenizer, "Skip tokenizer metadata")
-	fs.BoolVar(&skipEstimate, "skip-estimate", skipEstimate, "Skip estimate")
-	fs.BoolVar(&json, "json", json, "Output as JSON")
-	fs.BoolVar(&jsonPretty, "json-pretty", jsonPretty, "Output as pretty JSON")
+		"/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. "+
+		"Note that gguf-parser does not need to download the entire GGUF file.")
+	fs.StringVar(&repo, "repo", repo, "Repository of HuggingFace which the GGUF file store, e.g. "+
+		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --file.")
+	fs.StringVar(&file, "file", file, "Model file below the --repo, e.g. "+
+		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.")
+	fs.BoolVar(&debug, "debug", debug, "Enable debugging, verbosity.")
+	fs.BoolVar(&skipTLSVerify, "skip-tls-verify", skipTLSVerify, "Skip TLS verification, works with --url.")
+	fs.IntVar(&ctxSize, "ctx-size", ctxSize, "Specify the size of prompt context, "+
+		"which is used to estimate the usage, "+
+		"default is equal to the model's maximum context size.")
+	fs.IntVar(&batchSize, "batch-size", batchSize, "Specify the physical maximum batch size, "+
+		"which is used to estimate the usage, "+
+		"default is 512.")
+	fs.IntVar(&parallelSize, "parallel-size", parallelSize, "Specify the number of parallel sequences to decode, "+
+		"which is used to estimate the usage, "+
+		"default is 1.")
+	fs.StringVar(&kvType, "kv-type", kvType, "Specify the type of Key-Value cache, "+
+		"which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1], "+
+		"default is f16. "+
+		"Use quantization type means enabling --flash-attention as well.")
+	fs.BoolVar(&flashAttention, "flash-attention", flashAttention, "Specify enabling Flash Attention, "+
+		"which is used to estimate the usage. "+
+		"Flash Attention can reduce the usage of RAM/VRAM.")
+	fs.BoolVar(&noMMap, "no-mmap", noMMap, "Specify disabling Memory-Mapped using, "+
+		"which is used to estimate the usage. "+
+		"Memory-Mapped can avoid loading the entire model weights into RAM.")
+	fs.IntVar(&offloadLayers, "offload-layers", offloadLayers, "Specify how many layers to offload, "+
+		"which is used to estimate the usage, "+
+		"default is full offloaded.")
+	fs.Uint64Var(&offloadLayersStep, "offload-layers-step", offloadLayersStep, "Specify the step of layers to offload, "+
+		"works with --offload-layers.")
+	fs.BoolVar(&version, "version", version, "Show gguf-parser version.")
+	fs.BoolVar(&skipModel, "skip-model", skipModel, "Skip to display model metadata.")
+	fs.BoolVar(&skipArchitecture, "skip-architecture", skipArchitecture, "Skip to display architecture metadata.")
+	fs.BoolVar(&skipTokenizer, "skip-tokenizer", skipTokenizer, "Skip to display tokenizer metadata")
+	fs.BoolVar(&skipEstimate, "skip-estimate", skipEstimate, "Skip to estimate.")
+	fs.BoolVar(&json, "json", json, "Output as JSON,")
+	fs.BoolVar(&jsonPretty, "json-pretty", jsonPretty, "Output as pretty JSON.")
 	if err := fs.Parse(os.Args[1:]); err != nil {
 		fmt.Println(err.Error())
 		os.Exit(1)
@@ -99,10 +116,7 @@ func main() {
 	if debug {
 		ropts = append(ropts, UseDebug())
 	}
-	if skipProxy {
-		ropts = append(ropts, SkipProxy())
-	}
-	if skipTLS {
+	if skipTLSVerify {
 		ropts = append(ropts, SkipTLSVerification())
 	}
 
@@ -141,9 +155,6 @@ func main() {
 		}
 		eopts = append(eopts, WithCacheKeyType(kv), WithCacheValueType(kv))
 	}
-	if offloadLayers >= 0 {
-		eopts = append(eopts, WithOffloadLayers(uint64(offloadLayers)))
-	}
 	if flashAttention {
 		eopts = append(eopts, WithFlashAttention())
 	}
@@ -152,6 +163,8 @@ func main() {
 
 	var gf *GGUFFile
 	{
+		ropts := ropts[:len(ropts):len(ropts)]
+
 		var err error
 		switch {
 		default:
@@ -161,8 +174,8 @@ func main() {
 			gf, err = ParseGGUFFile(path, ropts...)
 		case url != "":
 			gf, err = ParseGGUFFileRemote(ctx, url, ropts...)
-		case repo != "" && model != "":
-			gf, err = ParseGGUFFileFromHuggingFace(ctx, repo, model, ropts...)
+		case repo != "" && file != "":
+			gf, err = ParseGGUFFileFromHuggingFace(ctx, repo, file, ropts...)
 		}
 		if err != nil {
 			_, _ = fmt.Fprintf(os.Stderr, "failed to parse GGUF file: %s\n", err.Error())
@@ -179,13 +192,18 @@ func main() {
 	if !skipModel {
 		m = gf.Model()
 	}
-	if !skipArchitecture {
+	if !skipArchitecture && !skipEstimate {
 		a = gf.Architecture()
 	}
-	if !skipTokenizer {
+	if !skipTokenizer && !skipEstimate {
 		t = gf.Tokenizer()
 	}
 	if !skipEstimate {
+		eopts := eopts[:len(eopts):len(eopts)]
+
+		if offloadLayers >= 0 {
+			eopts = append(eopts, WithOffloadLayers(uint64(offloadLayers)))
+		}
 		e = gf.EstimateLLaMACppUsage(eopts...)
 	}
 
@@ -204,6 +222,28 @@ func main() {
 		}
 		if !skipEstimate {
 			es := e.Summarize(!noMMap)
+			switch {
+			case offloadLayersStep > e.OffloadLayers:
+				offloadLayersStep = e.OffloadLayers
+			case offloadLayersStep <= 0:
+				offloadLayersStep = e.OffloadLayers
+			}
+			if offloadLayersStep < e.OffloadLayers {
+				ess := make([]LLaMACppUsageEstimateMemorySummary, e.OffloadLayers/offloadLayersStep+1)
+				var wg sync.WaitGroup
+				for i := 0; i < cap(ess); i++ {
+					wg.Add(1)
+					go func(i int) {
+						defer wg.Done()
+						eopts := eopts[:len(eopts):len(eopts)]
+						eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
+						ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(!noMMap)
+					}(i)
+				}
+				wg.Wait()
+				ess[cap(ess)-1] = es.Memory[0]
+				es.Memory = ess
+			}
 			o["estimate"] = es
 		}
 
@@ -270,27 +310,45 @@ func main() {
 
 	if !skipEstimate {
 		es := e.Summarize(!noMMap)
-		tprint(
-			"ESTIMATE",
-			[]string{"Arch", "Context Size", "Full Offload", "Flash Attention", "MMap Support", "Mem. Arch", "Usage"},
-			[]string{
-				sprintf(es.Architecture),
-				sprintf(es.ContextSize),
-				sprintf(es.FullOffload),
-				sprintf(es.FlashAttention),
-				sprintf(!es.NoMMap),
-				"UMA",
-				sprintf(es.UMA),
-			},
-			[]string{
+		switch {
+		case offloadLayersStep > e.OffloadLayers:
+			offloadLayersStep = e.OffloadLayers
+		case offloadLayersStep <= 0:
+			offloadLayersStep = e.OffloadLayers
+		}
+		if offloadLayersStep < e.OffloadLayers {
+			ess := make([]LLaMACppUsageEstimateMemorySummary, e.OffloadLayers/offloadLayersStep+1)
+			var wg sync.WaitGroup
+			for i := 0; i < cap(ess); i++ {
+				wg.Add(1)
+				go func(i int) {
+					defer wg.Done()
+					eopts := eopts[:len(eopts):len(eopts)]
+					eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
+					ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(!noMMap)
+				}(i)
+			}
+			wg.Wait()
+			ess[cap(ess)-1] = es.Memory[0]
+			es.Memory = ess
+		}
+		bd := make([][]string, len(es.Memory))
+		for i := range es.Memory {
+			bd[i] = []string{
 				sprintf(es.Architecture),
 				sprintf(es.ContextSize),
-				sprintf(es.FullOffload),
 				sprintf(es.FlashAttention),
 				sprintf(!es.NoMMap),
-				"NonUMA",
-				sprintf("%s (RAM) + %s (VRAM)", es.NonUMA.RAM, es.NonUMA.VRAM),
-			})
+				sprintf(es.Memory[i].OffloadLayers),
+				sprintf(es.Memory[i].UMA),
+				sprintf(es.Memory[i].NonUMA.RAM),
+				sprintf(es.Memory[i].NonUMA.VRAM),
+			}
+		}
+		tprint(
+			"ESTIMATE",
+			[]string{"Arch", "Context Size", "Flash Attention", "MMap Support", "Offload Layers", "UMA RAM", "NonUMA RAM", "NonUMA VRAM"},
+			bd...)
 	}
 }
 
@@ -337,7 +395,7 @@ func tprint(title string, header []string, body ...[]string) {
 	tb.SetAlignment(tablewriter.ALIGN_CENTER)
 	tb.SetHeaderLine(true)
 	tb.SetRowLine(true)
-	tb.SetAutoMergeCellsByColumnIndex([]int{0, 1, 2, 3, 4})
+	tb.SetAutoMergeCells(true)
 	tb.Append(append([]string{title}, header...))
 	for i := range body {
 		tb.Append(append([]string{title}, body[i]...))
diff --git a/file.go b/file.go
index f7ce060..569f8e4 100644
--- a/file.go
+++ b/file.go
@@ -13,7 +13,6 @@ import (
 	"strings"
 	"time"
 
-	"github.com/dustin/go-humanize"
 	"golang.org/x/exp/constraints"
 
 	"github.com/thxcode/gguf-parser-go/util/bytex"
@@ -298,8 +297,8 @@ func ParseGGUFFileRemote(ctx context.Context, url string, opts ...GGUFReadOption
 
 // ParseGGUFFileFromHuggingFace parses a GGUF file from Hugging Face,
 // and returns a GGUFFile, or an error if any.
-func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, model string, opts ...GGUFReadOption) (*GGUFFile, error) {
-	return ParseGGUFFileRemote(ctx, fmt.Sprintf("https://huggingface.co/%s/resolve/main/%s", repo, model), opts...)
+func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, file string, opts ...GGUFReadOption) (*GGUFFile, error) {
+	return ParseGGUFFileRemote(ctx, fmt.Sprintf("https://huggingface.co/%s/resolve/main/%s", repo, file), opts...)
 }
 
 func parseGGUFFile(s int64, f io.ReadSeeker, o _GGUFReadOptions) (_ *GGUFFile, err error) {
@@ -507,31 +506,72 @@ func (gf *GGUFFile) layers() GGUFLayerTensorInfos {
 	return ret
 }
 
+const (
+	_KiBytes = 1 << ((iota + 1) * 10)
+	_MiBytes
+	_GiBytes
+	_TiBytes
+	_PiBytes
+)
+
 func (s GGUFBytesScalar) String() string {
 	if s == 0 {
 		return "0 B"
 	}
-	return humanize.IBytes(uint64(s))
+	b, u := float64(1), "B"
+	switch {
+	case s >= _PiBytes:
+		b = _PiBytes
+		u = "PiB"
+	case s >= _TiBytes:
+		b = _TiBytes
+		u = "TiB"
+	case s >= _GiBytes:
+		b = _GiBytes
+		u = "GiB"
+	case s >= _MiBytes:
+		b = _MiBytes
+		u = "MiB"
+	case s >= _KiBytes:
+		b = _KiBytes
+		u = "KiB"
+	}
+	f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64)
+	return strings.TrimSuffix(f, ".00") + " " + u
 }
 
+const (
+	_Thousand    = 1e3
+	_Million     = 1e6
+	_Billion     = 1e9
+	_Trillion    = 1e12
+	_Quadrillion = 1e15
+)
+
 func (s GGUFParametersScalar) String() string {
 	if s == 0 {
 		return "0"
 	}
+	b, u := float64(1), ""
 	switch {
-	case s >= 1e15:
-		return humanize.CommafWithDigits(float64(s)/1e15, 1) + " Q"
-	case s >= 1e12:
-		return humanize.CommafWithDigits(float64(s)/1e12, 1) + " T"
-	case s >= 1e9:
-		return humanize.CommafWithDigits(float64(s)/1e9, 1) + " B"
-	case s >= 1e6:
-		return humanize.CommafWithDigits(float64(s)/1e6, 1) + " M"
-	case s >= 1e3:
-		return humanize.CommafWithDigits(float64(s)/1e3, 1) + " K"
-	default:
-		return strconv.Itoa(int(s))
-	}
+	case s >= _Quadrillion:
+		b = _Quadrillion
+		u = "Q"
+	case s >= _Trillion:
+		b = _Trillion
+		u = "T"
+	case s >= _Billion:
+		b = _Billion
+		u = "B"
+	case s >= _Million:
+		b = _Million
+		u = "M"
+	case s >= _Thousand:
+		b = _Thousand
+		u = "K"
+	}
+	f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64)
+	return strings.TrimSuffix(f, ".00") + " " + u
 }
 
 func (s GGUFBitsPerWeightScalar) String() string {
diff --git a/file_estimate.go b/file_estimate.go
index 96bb9db..8a398d6 100644
--- a/file_estimate.go
+++ b/file_estimate.go
@@ -16,14 +16,16 @@ type (
 		// FlashAttention is the flag to indicate whether enable the flash attention,
 		// true for enable.
 		FlashAttention bool `json:"flashAttention"`
+		// ContextSize is the size of the context.
+		ContextSize uint64 `json:"contextSize"`
 		// FullOffload is the flag to indicate whether the layers are fully offloaded,
 		// false for partial offloaded or zero offloaded.
 		FullOffload bool `json:"fullOffload"`
+		// OffloadLayers is the number of offloaded layers.
+		OffloadLayers uint64 `json:"offloadLayers"`
 		// NoMMap is the flag to indicate whether the file must be loaded without mmap,
 		// true for total loaded.
 		NoMMap bool `json:"noMMap"`
-		// ContextSize is the size of the context.
-		ContextSize uint64 `json:"contextSize"`
 		// Load is the memory usage for running the GGUF file in RAM.
 		Load LLaMACppMemoryUsage `json:"load"`
 		// Offload is the memory usage for loading the GGUF file in VRAM.
@@ -86,7 +88,21 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 		o.CacheValueType = ptr.To(GGMLTypeF16)
 	}
 
-	a, t := gf.Architecture(), gf.Tokenizer()
+	// Architecture and tokenizer metadata.
+	var (
+		a GGUFArchitectureMetadata
+		t GGUFTokenizerMetadata
+	)
+	if o.Architecture != nil {
+		a = *o.Architecture
+	} else {
+		a = gf.Architecture()
+	}
+	if o.Tokenizer != nil {
+		t = *o.Tokenizer
+	} else {
+		t = gf.Tokenizer()
+	}
 	e.Architecture = a.Architecture
 
 	// Flash attention.
@@ -163,6 +179,7 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 		nLoadLayers -= nOffloadLayers
 
 		e.FullOffload = isOffloadOutputLayer && nLoadLayers == 0
+		e.OffloadLayers = nOffloadLayers
 	}
 
 	// Footprint.
@@ -368,47 +385,56 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 	return e
 }
 
-// LLaMACppUsageEstimateSummery represents the summary of the usage for loading the GGUF file in llama.cpp.
-type LLaMACppUsageEstimateSummery struct {
-	/* Basic */
-
-	// UMA represents the usage of Unified Memory Architecture.
-	UMA GGUFBytesScalar `json:"uma"`
-	// NonUMA represents the usage of Non-Unified Memory Architecture.
-	NonUMA struct {
-		// Load is the memory usage for loading the GGUF file in Load.
-		RAM GGUFBytesScalar `json:"ram"`
-		// VRAM is the memory usage for loading the GGUF file in VRAM.
-		VRAM GGUFBytesScalar `json:"vram"`
-	} `json:"nonUMA"`
-
-	/* Appendix */
-
-	// Architecture describes what architecture this model implements.
-	Architecture string `json:"architecture"`
-	// FlashAttention is the flag to indicate whether enable the flash attention,
-	// true for enable.
-	FlashAttention bool `json:"flashAttention"`
-	// FullOffload is the flag to indicate whether the layers are fully offloaded,
-	// false for partial offloaded or zero offloaded.
-	FullOffload bool `json:"fullOffload"`
-	// NoMMap is the flag to indicate whether the file must be loaded without mmap,
-	// true for total loaded.
-	NoMMap bool `json:"noMMap"`
-	// ContextSize is the size of the context.
-	ContextSize uint64 `json:"contextSize"`
-}
+// Types for LLaMACpp estimated summary.
+type (
+	// LLaMACppUsageEstimateSummary represents the summary of the usage for loading the GGUF file in llama.cpp.
+	LLaMACppUsageEstimateSummary struct {
+		/* Basic */
+
+		Memory []LLaMACppUsageEstimateMemorySummary `json:"memory"`
+
+		/* Appendix */
+
+		// Architecture describes what architecture this model implements.
+		Architecture string `json:"architecture"`
+		// ContextSize is the size of the context.
+		ContextSize uint64 `json:"contextSize"`
+		// FlashAttention is the flag to indicate whether enable the flash attention,
+		// true for enable.
+		FlashAttention bool `json:"flashAttention"`
+		// NoMMap is the flag to indicate whether the file must be loaded without mmap,
+		// true for total loaded.
+		NoMMap bool `json:"noMMap"`
+	}
+
+	// LLaMACppUsageEstimateMemorySummary represents the memory summary of the usage for loading the GGUF file in llama.cpp.
+	LLaMACppUsageEstimateMemorySummary struct {
+		// OffloadLayers is the number of offloaded layers.
+		OffloadLayers uint64 `json:"offloadLayers"`
+		// UMA represents the usage of Unified Memory Architecture.
+		UMA GGUFBytesScalar `json:"uma"`
+		// NonUMA represents the usage of Non-Unified Memory Architecture.
+		NonUMA struct {
+			// Load is the memory usage for loading the GGUF file in Load.
+			RAM GGUFBytesScalar `json:"ram"`
+			// VRAM is the memory usage for loading the GGUF file in VRAM.
+			VRAM GGUFBytesScalar `json:"vram"`
+		} `json:"nonUMA"`
+	}
+)
+
+func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool) (ems LLaMACppUsageEstimateMemorySummary) {
+	ems.OffloadLayers = e.OffloadLayers
 
-func (e LLaMACppUsageEstimate) Summarize(mmap bool) (es LLaMACppUsageEstimateSummery) {
 	// UMA.
 	{
 		fp := e.Load.Footprint + e.Offload.Footprint
 		wg := e.Load.Weight.Sum() + e.Offload.Weight.Sum()
 		kv := e.Load.KVCache.Sum() + e.Offload.KVCache.Sum()
 		cp := e.Load.Computation.Sum()
-		es.UMA = fp + wg + kv + cp
+		ems.UMA = fp + wg + kv + cp
 		if !e.NoMMap && mmap {
-			es.UMA -= wg
+			ems.UMA -= wg
 		}
 	}
 
@@ -431,24 +457,32 @@ func (e LLaMACppUsageEstimate) Summarize(mmap bool) (es LLaMACppUsageEstimateSum
 		wg := e.Load.Weight.Sum()
 		kv := e.Load.KVCache.Sum()
 		cp := e.Load.Computation.Sum()
-		es.NonUMA.RAM = fp + wg + kv + cp
+		ems.NonUMA.RAM = fp + wg + kv + cp
 		if !e.NoMMap && (mmap || e.FullOffload) {
-			es.NonUMA.RAM -= wg
+			ems.NonUMA.RAM -= wg
 		}
 		// VRAM.
 		fp = e.Offload.Footprint
 		wg = e.Offload.Weight.Sum()
 		kv = e.Offload.KVCache.Sum()
 		cp = e.Offload.Computation.Sum()
-		es.NonUMA.VRAM = fp + wg + kv + cp
+		ems.NonUMA.VRAM = fp + wg + kv + cp
+	}
+
+	return ems
+}
+
+func (e LLaMACppUsageEstimate) Summarize(mmap bool) (es LLaMACppUsageEstimateSummary) {
+	// Summarize memory.
+	es.Memory = []LLaMACppUsageEstimateMemorySummary{
+		e.SummarizeMemory(mmap),
 	}
 
 	// Just copy from the original estimate.
 	es.Architecture = e.Architecture
+	es.ContextSize = e.ContextSize
 	es.FlashAttention = e.FlashAttention
-	es.FullOffload = e.FullOffload
 	es.NoMMap = e.NoMMap
-	es.ContextSize = e.ContextSize
 
 	return es
 }
diff --git a/file_estimate_option.go b/file_estimate_option.go
index 3c48053..813b54b 100644
--- a/file_estimate_option.go
+++ b/file_estimate_option.go
@@ -6,6 +6,8 @@ import (
 
 type (
 	_LLaMACppUsageEstimateOptions struct {
+		Architecture   *GGUFArchitectureMetadata
+		Tokenizer      *GGUFTokenizerMetadata
 		ContextSize    *int32
 		BatchSize      *int32
 		ParallelSize   *int32
@@ -17,6 +19,24 @@ type (
 	LLaMACppUsageEstimateOption func(*_LLaMACppUsageEstimateOptions)
 )
 
+// WithArchitecture sets the architecture for the estimate.
+//
+// Allows reusing the same GGUFArchitectureMetadata for multiple estimates.
+func WithArchitecture(arch GGUFArchitectureMetadata) LLaMACppUsageEstimateOption {
+	return func(o *_LLaMACppUsageEstimateOptions) {
+		o.Architecture = &arch
+	}
+}
+
+// WithTokenizer sets the tokenizer for the estimate.
+//
+// Allows reusing the same GGUFTokenizerMetadata for multiple estimates.
+func WithTokenizer(tokenizer GGUFTokenizerMetadata) LLaMACppUsageEstimateOption {
+	return func(o *_LLaMACppUsageEstimateOptions) {
+		o.Tokenizer = &tokenizer
+	}
+}
+
 // WithContextSize sets the context size for the estimate.
 func WithContextSize(size int32) LLaMACppUsageEstimateOption {
 	return func(o *_LLaMACppUsageEstimateOptions) {
diff --git a/go.mod b/go.mod
index df64f18..2998fb2 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,6 @@ go 1.22
 
 require (
 	github.com/davecgh/go-spew v1.1.1
-	github.com/dustin/go-humanize v1.0.1
 	github.com/henvic/httpretty v0.1.3
 	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b
 	github.com/stretchr/testify v1.9.0
diff --git a/go.sum b/go.sum
index e2f09c1..467e640 100644
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,5 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
-github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
 github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=