diff --git a/Makefile b/Makefile
index f70db18..2f9b858 100644
--- a/Makefile
+++ b/Makefile
@@ -70,8 +70,10 @@ gguf-parser:
 	[[ -d "$(SRCDIR)/.dist" ]] || mkdir -p "$(SRCDIR)/.dist"
 
 	cd "$(SRCDIR)/cmd/gguf-parser" && for os in darwin linux windows; do \
+  		tags="netgo"; \
   		if [[ $$os == "windows" ]]; then \
 		  suffix=".exe"; \
+		  tags="netcgo"; \
 		else \
 		  suffix=""; \
 		fi; \
@@ -80,7 +82,7 @@ gguf-parser:
 			GOOS="$$os" GOARCH="$$arch" CGO_ENABLED=1 go build \
 				-trimpath \
 				-ldflags="-w -s -X main.Version=$(VERSION)" \
-				-tags="netgo" \
+				-tags="$$tags" \
 				-o $(SRCDIR)/.dist/gguf-parser-$$os-$$arch$$suffix; \
 		done; \
 		if [[ $$os == "darwin" ]]; then \
diff --git a/README.md b/README.md
index 1047cd3..b4e5e09 100644
--- a/README.md
+++ b/README.md
@@ -23,12 +23,7 @@ go get github.com/thxcode/gguf-parser-go
 
 ```
 
-You can also use the command-line package.
-
-```shell
-go install github.com/thxcode/gguf-parser-go/cmd/gguf-parser
-
-```
+If you need one-shot command-line, try [gguf-parser](./cmd/gguf-parser) please.
 
 ## Examples
 
diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
index f3d8174..a205977 100644
--- a/cmd/gguf-parser/README.md
+++ b/cmd/gguf-parser/README.md
@@ -76,15 +76,15 @@ Usage of gguf-parser ...:
 
 ### Parse
 
-#### parse local GGUF file
+#### Parse local GGUF file
 
 ```shell
 $ gguf-parser --path="~/.cache/lm-studio/models/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf"
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
-|      \       | Name  | Arch  | Quantization Version |   File Type    | Little Endian |   Size   | Parameters |   BPW    |
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
-|    MODEL     | jeffq | llama |          2           | IQ3_XXS/Q5_K_M |     true      | 4.78 GiB |   7.24 B   | 5.67 bpw |
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
+|      \       | Name  | Arch  |  Quantization  | Little Endian |   Size   | Parameters |   BPW    |
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
+|    MODEL     | jeffq | llama | IQ3_XXS/Q5_K_M |     true      | 4.78 GiB |   7.24 B   | 5.67 bpw |
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
 
 +--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
 |      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
@@ -98,23 +98,23 @@ $ gguf-parser --path="~/.cache/lm-studio/models/NousResearch/Hermes-2-Pro-Mistra
 |  TOKENIZER   | llama | 450.50 KiB  |   32032    |        0         |     1     |   32000   |      N/A      |       N/A       |      N/A      |
 +--------------+-------+-------------+------------+------------------+-----------+-----------+---------------+-----------------+---------------+
 
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
-|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |         UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |     true     |  33 (32 + 1)   |      Yes       | 88.39 MiB + 11.06 GiB = 8.68 GiB | 238.39 MiB |  11.06 GiB  |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
+|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |        UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
+|   ESTIMATE   | llama |    32768     |      false      |     true     |  33 (32 + 1)   |      Yes       | 88.39 MiB + 8.59 GiB = 8.68 GiB | 238.39 MiB |  11.06 GiB  |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 
 ```
 
-#### parse remote GGUF file
+#### Parse remote GGUF file
 
 ```shell
 $ gguf-parser --url="https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF/resolve/main/Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf"
-+--------------+----------+-------+----------------------+-------------+---------------+--------+------------+----------+
-|      \       |   Name   | Arch  | Quantization Version |  File Type  | Little Endian |  Size  | Parameters |   BPW    |
-+--------------+----------+-------+----------------------+-------------+---------------+--------+------------+----------+
-|    MODEL     | emozilla | llama |          2           | Q4_K/Q3_K_M |     true      | 21 GiB |  46.70 B   | 3.86 bpw |
-+--------------+----------+-------+----------------------+-------------+---------------+--------+------------+----------+
++--------------+----------+-------+--------------+---------------+--------+------------+----------+
+|      \       |   Name   | Arch  | Quantization | Little Endian |  Size  | Parameters |   BPW    |
++--------------+----------+-------+--------------+---------------+--------+------------+----------+
+|    MODEL     | emozilla | llama | Q4_K/Q3_K_M  |     true      | 21 GiB |  46.70 B   | 3.86 bpw |
++--------------+----------+-------+--------------+---------------+--------+------------+----------+
 
 +--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
 |      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
@@ -131,7 +131,7 @@ $ gguf-parser --url="https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 27.31 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
+|   ESTIMATE   | llama |    32768     |      false      |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 24.84 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
@@ -140,11 +140,11 @@ $ gguf-parser --url="https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8
 
 ```shell
 $ gguf-parser --hf-repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --hf-file="ggml-model-Q5_K_M.gguf" 
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
-|      \       | Name  | Arch  | Quantization Version |   File Type    | Little Endian |   Size   | Parameters |   BPW    |
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
-|    MODEL     | model | llama |          2           | IQ3_XXS/Q5_K_M |     true      | 5.33 GiB |   8.03 B   | 5.70 bpw |
-+--------------+-------+-------+----------------------+----------------+---------------+----------+------------+----------+
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
+|      \       | Name  | Arch  |  Quantization  | Little Endian |   Size   | Parameters |   BPW    |
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
+|    MODEL     | model | llama | IQ3_XXS/Q5_K_M |     true      | 5.33 GiB |   8.03 B   | 5.70 bpw |
++--------------+-------+-------+----------------+---------------+----------+------------+----------+
 
 +--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
 |      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
@@ -161,7 +161,7 @@ $ gguf-parser --hf-repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --hf-file="ggml-mode
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |        UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-|   ESTIMATE   | llama |     8192     |      false      |     true     |  33 (32 + 1)   |      Yes       | 84.61 MiB + 6.49 GiB = 5.68 GiB | 234.61 MiB |  6.49 GiB   |
+|   ESTIMATE   | llama |     8192     |      false      |     true     |  33 (32 + 1)   |      Yes       | 84.61 MiB + 5.59 GiB = 5.68 GiB | 234.61 MiB |  6.49 GiB   |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 
 ```
@@ -170,11 +170,11 @@ $ gguf-parser --hf-repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --hf-file="ggml-mode
 
 ```shell
 $ gguf-parser --ol-model="gemma2"
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
-|      \       |  Name  |  Arch  | Quantization Version | File Type | Little Endian |   Size   | Parameters |   BPW    |
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
-|    MODEL     | gemma2 | gemma2 |          2           |   Q4_0    |     true      | 5.06 GiB |   9.24 B   | 4.71 bpw |
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
+|      \       |  Name  |  Arch  | Quantization | Little Endian |   Size   | Parameters |   BPW    |
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
+|    MODEL     | gemma2 | gemma2 |     Q4_0     |     true      | 5.06 GiB |   9.24 B   | 4.71 bpw |
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
 
 +--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
 |      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
@@ -191,15 +191,15 @@ $ gguf-parser --ol-model="gemma2"
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 |      \       |  Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |        UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-|   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.97 MiB + 8.43 GiB = 7.05 GiB | 215.97 MiB |  8.43 GiB   |
+|   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.97 MiB + 6.99 GiB = 7.05 GiB | 215.97 MiB |  8.43 GiB   |
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 
 $ gguf-parser --ol-model="gemma2" --ol-crawl
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
-|      \       |  Name  |  Arch  | Quantization Version | File Type | Little Endian |   Size   | Parameters |   BPW    |
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
-|    MODEL     | gemma2 | gemma2 |          2           |   Q4_0    |     true      | 5.06 GiB |   9.24 B   | 4.71 bpw |
-+--------------+--------+--------+----------------------+-----------+---------------+----------+------------+----------+
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
+|      \       |  Name  |  Arch  | Quantization | Little Endian |   Size   | Parameters |   BPW    |
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
+|    MODEL     | gemma2 | gemma2 |     Q4_0     |     true      | 5.06 GiB |   9.24 B   | 4.71 bpw |
++--------------+--------+--------+--------------+---------------+----------+------------+----------+
 
 +--------------+-----------------+---------------+---------------+--------------------+--------+------------------+------------+----------------+
 |      \       | Max Context Len | Embedding Len | Embedding GQA | Attention Head Cnt | Layers | Feed Forward Len | Expert Cnt | Vocabulary Len |
@@ -216,11 +216,35 @@ $ gguf-parser --ol-model="gemma2" --ol-crawl
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 |      \       |  Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |        UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
-|   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.99 MiB + 8.43 GiB = 7.05 GiB | 215.99 MiB |  8.43 GiB   |
+|   ESTIMATE   | gemma2 |     8192     |      false      |     true     |  43 (42 + 1)   |      Yes       | 65.99 MiB + 6.99 GiB = 7.05 GiB | 215.99 MiB |  8.43 GiB   |
 +--------------+--------+--------------+-----------------+--------------+----------------+----------------+---------------------------------+------------+-------------+
 
 ```
 
+#### Parse Clip model
+
+```shell
+$ gguf-parser --hf-repo="xtuner/llava-llama-3-8b-v1_1-gguf" --hf-file="llava-llama-3-8b-v1_1-mmproj-f16.gguf"
++--------------+-----------------------------------+------+--------------+---------------+------------+------------+-----------+
+|      \       |               Name                | Arch | Quantization | Little Endian |    Size    | Parameters |    BPW    |
++--------------+-----------------------------------+------+--------------+---------------+------------+------------+-----------+
+|    MODEL     | openai/clip-vit-large-patch14-336 | clip |     F16      |     true      | 595.49 MiB |  311.89 M  | 16.02 bpw |
++--------------+-----------------------------------+------+--------------+---------------+------------+------------+-----------+
+
++--------------+---------------+--------+------------------+---------+-----------------+
+|      \       | Embedding Len | Layers | Feed Forward Len | Encoder | LLaVA Projector |
++--------------+---------------+--------+------------------+---------+-----------------+
+| ARCHITECTURE |     1024      |   23   |       4096       | Vision  |       mlp       |
++--------------+---------------+--------+------------------+---------+-----------------+
+
++--------------+------+----------------+----------------+------------+
+|      \       | Arch | Offload Layers | Full Offloaded |   (V)RAM   |
++--------------+------+----------------+----------------+------------+
+|   ESTIMATE   | clip |       24       |      Yes       | 595.49 MiB |
++--------------+------+----------------+----------------+------------+
+
+```
+
 ### Estimate
 
 #### Estimate with full layers offload (default)
@@ -230,7 +254,7 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 27.31 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
+|   ESTIMATE   | llama |    32768     |      false      |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 24.84 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
@@ -239,11 +263,11 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 
 ```shell
 $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --gpu-layers=0
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
-|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |         UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |       0        |       No       | 25.09 GiB + 2.46 GiB = 25.09 GiB | 25.24 GiB  |  2.46 GiB   |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------+------------+-------------+
+|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |      UMA (RAM + VRAM)       | NonUMA RAM | NonUMA VRAM |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------+------------+-------------+
+|   ESTIMATE   | llama |    32768     |      false      |    false     |       0        |       No       | 25.09 GiB + 0 B = 25.09 GiB | 25.24 GiB  |  2.46 GiB   |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------+------------+-------------+
 
 ```
 
@@ -251,11 +275,11 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 
 ```shell
 $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-file="Nous-Hermes-2-Mixtral-8x7B-DPO.Q3_K_M.gguf" --skip-model --skip-architecture --skip-tokenizer --gpu-layers=10
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------------+------------+-------------+
-|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |         UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |       10       |       No       | 17.36 GiB + 10.19 GiB = 25.09 GiB | 17.51 GiB  |  10.19 GiB  |
-+--------------+-------+--------------+-----------------+--------------+----------------+----------------+-----------------------------------+------------+-------------+
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
+|      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
+|   ESTIMATE   | llama |     4096     |      false      |    false     |  33 (32 + 1)   |      Yes       | 189.24 MiB + 21.34 GiB = 21.53 GiB | 339.24 MiB |  21.89 GiB  |
++--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
 
@@ -266,7 +290,7 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
-|   ESTIMATE   | llama |     4096     |      false      |    false     |  33 (32 + 1)   |      Yes       | 189.24 MiB + 21.89 GiB = 21.53 GiB | 339.24 MiB |  21.89 GiB  |
+|   ESTIMATE   | llama |     4096     |      false      |    false     |  33 (32 + 1)   |      Yes       | 189.24 MiB + 21.34 GiB = 21.53 GiB | 339.24 MiB |  21.89 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
@@ -278,7 +302,7 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      true       |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 25.33 GiB = 25.08 GiB | 395.24 MiB |  25.33 GiB  |
+|   ESTIMATE   | llama |    32768     |      true       |    false     |  33 (32 + 1)   |      Yes       | 245.24 MiB + 24.84 GiB = 25.08 GiB | 395.24 MiB |  25.33 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
@@ -290,7 +314,7 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |         UMA (RAM + VRAM)         | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |       0        |       No       | 25.09 GiB + 2.46 GiB = 25.09 GiB | 25.24 GiB  |  2.46 GiB   |
+|   ESTIMATE   | llama |    32768     |      false      |    false     |       10       |       No       | 17.36 GiB + 7.73 GiB = 25.09 GiB | 17.51 GiB  |  10.19 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+----------------------------------+------------+-------------+
 
 ```
@@ -302,21 +326,21 @@ $ gguf-parser --hf-repo="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF" --hf-
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 |      \       | Arch  | Context Size | Flash Attention | MMap Support | Offload Layers | Full Offloaded |          UMA (RAM + VRAM)          | NonUMA RAM | NonUMA VRAM |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
-|   ESTIMATE   | llama |    32768     |      false      |    false     |       0        |       No       |  25.09 GiB + 2.46 GiB = 25.09 GiB  | 25.24 GiB  |  2.46 GiB   |
+|   ESTIMATE   | llama |    32768     |      false      |    false     |       0        |       No       |    25.09 GiB + 0 B = 25.09 GiB     | 25.24 GiB  |  2.46 GiB   |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       5        |                |  21.23 GiB + 6.33 GiB = 25.09 GiB  | 21.37 GiB  |  6.33 GiB   |
+|              |       |              |                 |              |       5        |                |  21.23 GiB + 3.86 GiB = 25.09 GiB  | 21.37 GiB  |  6.33 GiB   |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       10       |                | 17.36 GiB + 10.19 GiB = 25.09 GiB  | 17.51 GiB  |  10.19 GiB  |
+|              |       |              |                 |              |       10       |                |  17.36 GiB + 7.73 GiB = 25.09 GiB  | 17.51 GiB  |  10.19 GiB  |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       15       |                | 13.50 GiB + 14.06 GiB = 25.09 GiB  | 13.64 GiB  |  14.06 GiB  |
+|              |       |              |                 |              |       15       |                | 13.50 GiB + 11.59 GiB = 25.09 GiB  | 13.64 GiB  |  14.06 GiB  |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       20       |                |  9.63 GiB + 17.92 GiB = 25.09 GiB  |  9.78 GiB  |  17.92 GiB  |
+|              |       |              |                 |              |       20       |                |  9.63 GiB + 15.46 GiB = 25.09 GiB  |  9.78 GiB  |  17.92 GiB  |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       25       |                |  5.77 GiB + 21.79 GiB = 25.09 GiB  |  5.91 GiB  |  21.79 GiB  |
+|              |       |              |                 |              |       25       |                |  5.77 GiB + 19.32 GiB = 25.09 GiB  |  5.91 GiB  |  21.79 GiB  |
 +              +       +              +                 +              +----------------+                +------------------------------------+------------+-------------+
-|              |       |              |                 |              |       30       |                |  1.90 GiB + 25.65 GiB = 25.09 GiB  |  2.05 GiB  |  25.65 GiB  |
+|              |       |              |                 |              |       30       |                |  1.90 GiB + 23.19 GiB = 25.09 GiB  |  2.05 GiB  |  25.65 GiB  |
 +              +       +              +                 +              +----------------+----------------+------------------------------------+------------+-------------+
-|              |       |              |                 |              |  33 (32 + 1)   |      Yes       | 245.24 MiB + 27.31 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
+|              |       |              |                 |              |  33 (32 + 1)   |      Yes       | 245.24 MiB + 24.84 GiB = 25.08 GiB | 395.24 MiB |  27.31 GiB  |
 +--------------+-------+--------------+-----------------+--------------+----------------+----------------+------------------------------------+------------+-------------+
 
 ```
diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
index 34dba80..eaaaa74 100644
--- a/cmd/gguf-parser/main.go
+++ b/cmd/gguf-parser/main.go
@@ -11,9 +11,11 @@ import (
 
 	"github.com/olekukonko/tablewriter"
 
+	"github.com/thxcode/gguf-parser-go/util/anyx"
 	"github.com/thxcode/gguf-parser-go/util/json"
 
 	. "github.com/thxcode/gguf-parser-go"
+	"regexp"
 )
 
 var Version = "v0.0.0"
@@ -31,6 +33,7 @@ func main() {
 		hfFile  string
 		olModel string
 		olCrawl bool
+		olUsage bool
 		// read options
 		debug         bool
 		skipProxy     bool
@@ -49,6 +52,7 @@ func main() {
 		offloadLayersStep uint64
 		// output options
 		version          bool
+		raw              bool
 		skipModel        bool
 		skipArchitecture bool
 		skipTokenizer    bool
@@ -69,18 +73,22 @@ func main() {
 		"https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF"+
 		"/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. "+
 		"Note that gguf-parser does not need to download the entire GGUF file.")
-	fs.StringVar(&hfRepo, "repo", hfRepo, "Repository of HuggingFace which the GGUF file store, e.g. "+
-		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --file. [Deprecated, use --hf-repo instead]")
-	fs.StringVar(&hfFile, "file", hfFile, "Model file below the --repo, e.g. "+
-		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf. [Deprecated, use --hf-file instead]") // Deprecated.
+	fs.StringVar(&hfRepo, "repo", hfRepo, "[DEPRECATED, use --hf-repo instead] "+ // Deprecated, remove when release v0.3.0.
+		"Repository of HuggingFace which the GGUF file store, e.g. "+
+		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --file.")
+	fs.StringVar(&hfFile, "file", hfFile, "[DEPRECATED, use --hf-file instead] "+ // Deprecated, remove when release v0.3.0.
+		"Model file below the --repo, e.g. "+
+		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.")
 	fs.StringVar(&hfRepo, "hf-repo", hfRepo, "Repository of HuggingFace which the GGUF file store, e.g. "+
-		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --hf-file.") // Deprecated.
+		"NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF, works with --hf-file.")
 	fs.StringVar(&hfFile, "hf-file", hfFile, "Model file below the --repo, e.g. "+
 		"Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf.")
 	fs.StringVar(&olModel, "ol-model", olModel, "Model name of Ollama, e.g. "+
 		"gemma2.")
 	fs.BoolVar(&olCrawl, "ol-crawl", olCrawl, "Crawl the Ollama model instead of blobs fetching, "+
 		"which will be more efficient and faster, but lossy.")
+	fs.BoolVar(&olUsage, "ol-usage", olUsage, "Specify respecting the extending layers introduced by Ollama, "+
+		"which affects the usage estimation.")
 	fs.BoolVar(&debug, "debug", debug, "Enable debugging, verbosity.")
 	fs.BoolVar(&skipProxy, "skip-proxy", skipProxy, "Skip proxy settings, "+
 		"works with --url/--hf-*/--ol-*, "+
@@ -119,17 +127,20 @@ func main() {
 	fs.BoolVar(&noMMap, "no-mmap", noMMap, "Specify disabling Memory-Mapped using, "+
 		"which is used to estimate the usage. "+
 		"Memory-Mapped can avoid loading the entire model weights into RAM.")
-	fs.IntVar(&offloadLayers, "offload-layers", offloadLayers, "Specify how many layers to offload, "+
+	fs.IntVar(&offloadLayers, "offload-layers", offloadLayers, "[DEPRECATED, use --gpu-layers instead] "+ // Deprecated, remove when release v0.3.0.
+		"Specify how many layers to offload, "+
 		"which is used to estimate the usage, "+
-		"default is full offloaded. [Deprecated, use --gpu-layers instead]") // Deprecated.
+		"default is full offloaded.")
 	fs.IntVar(&offloadLayers, "gpu-layers", offloadLayers, "Specify how many layers to offload, "+
 		"which is used to estimate the usage, "+
 		"default is full offloaded.")
-	fs.Uint64Var(&offloadLayersStep, "offload-layers-step", offloadLayersStep, "Specify the step of layers to offload, "+
-		"works with --offload-layers. [Deprecated, use --gpu-layers-step instead]") // Deprecated.
+	fs.Uint64Var(&offloadLayersStep, "offload-layers-step", offloadLayersStep, "[DEPRECATED, use --gpu-layers-step instead] "+ // Deprecated, remove when release v0.3.0.
+		"Specify the step of layers to offload, "+
+		"works with --offload-layers.")
 	fs.Uint64Var(&offloadLayersStep, "gpu-layers-step", offloadLayersStep, "Specify the step of layers to offload, "+
 		"works with --gpu-layers.")
 	fs.BoolVar(&version, "version", version, "Show gguf-parser version.")
+	fs.BoolVar(&raw, "raw", raw, "Output the file only, skip anything.")
 	fs.BoolVar(&skipModel, "skip-model", skipModel, "Skip to display model metadata.")
 	fs.BoolVar(&skipArchitecture, "skip-architecture", skipArchitecture, "Skip to display architecture metadata.")
 	fs.BoolVar(&skipTokenizer, "skip-tokenizer", skipTokenizer, "Skip to display tokenizer metadata")
@@ -226,7 +237,36 @@ func main() {
 		case hfRepo != "" && hfFile != "":
 			gf, err = ParseGGUFFileFromHuggingFace(ctx, hfRepo, hfFile, ropts...)
 		case olModel != "":
-			gf, err = ParseGGUFFileFromOllama(ctx, olModel, olCrawl, ropts...)
+			om := ParseOllamaModel(olModel)
+			gf, err = ParseGGUFFileFromOllamaModel(ctx, om, olCrawl, ropts...)
+			if om != nil && olUsage {
+				// Parameters override.
+				{
+					ps, _ := om.Params(ctx, nil)
+					if v, ok := ps["num_ctx"]; ok {
+						eopts = append(eopts, WithContextSize(anyx.Number[int32](v)))
+					} else if ctxSize <= 0 {
+						eopts = append(eopts, WithContextSize(2048))
+					}
+					if v, ok := ps["use_mmap"]; ok && !anyx.Bool(v) {
+						noMMap = true
+					}
+					if v, ok := ps["num_gpu"]; ok {
+						offloadLayers = anyx.Number[int](v)
+					}
+				}
+				// Projector overlap,
+				// in here, we just assume the projector is overlapped with its size to VRAM.
+				{
+					var sz uint64
+					mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.projector$`))
+					for i := range mls {
+						sz += mls[i].Size
+					}
+					eopts = append(eopts, WithClipUsage(sz))
+				}
+
+			}
 		}
 		if err != nil {
 			_, _ = fmt.Fprintf(os.Stderr, "failed to parse GGUF file: %s\n", err.Error())
@@ -234,6 +274,22 @@ func main() {
 		}
 	}
 
+	// Output raw.
+
+	if raw {
+		enc := json.NewEncoder(os.Stdout)
+		if inPrettyJson {
+			enc.SetIndent("", "  ")
+		}
+		if err := enc.Encode(gf); err != nil {
+			_, _ = fmt.Fprintf(os.Stderr, "failed to encode JSON: %s\n", err.Error())
+			os.Exit(1)
+		}
+		return
+	}
+
+	// Otherwise, display the metadata and estimate the usage.
+
 	var (
 		m GGUFModelMetadata
 		a GGUFArchitectureMetadata
@@ -258,7 +314,8 @@ func main() {
 		e = gf.EstimateLLaMACppUsage(eopts...)
 	}
 
-	// Output
+	// Then, output as JSON or table.
+
 	var (
 		mmap                      = !noMMap
 		platformRAM, platformVRAM uint64
@@ -285,36 +342,38 @@ func main() {
 		if !skipArchitecture {
 			o["architecture"] = a
 		}
-		if !skipTokenizer {
+		if !skipTokenizer && t.Model != "" {
 			o["tokenizer"] = t
 		}
 		if !skipEstimate {
 			es := e.Summarize(mmap, platformRAM, platformVRAM)
-			switch {
-			case offloadLayersStep > e.OffloadLayers:
-				offloadLayersStep = e.OffloadLayers
-			case offloadLayersStep <= 0:
-				offloadLayersStep = e.OffloadLayers
-			}
-			if offloadLayersStep < e.OffloadLayers {
-				cnt := e.OffloadLayers/offloadLayersStep + 1
-				if e.OffloadLayers%offloadLayersStep != 0 || e.FullOffloaded {
-					cnt++
+			if e.Architecture != "clip" {
+				switch {
+				case offloadLayersStep > e.OffloadLayers:
+					offloadLayersStep = e.OffloadLayers
+				case offloadLayersStep <= 0:
+					offloadLayersStep = e.OffloadLayers
 				}
-				ess := make([]LLaMACppUsageEstimateMemorySummary, cnt)
-				var wg sync.WaitGroup
-				for i := 0; i < cap(ess); i++ {
-					wg.Add(1)
-					go func(i int) {
-						defer wg.Done()
-						eopts := eopts[:len(eopts):len(eopts)]
-						eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
-						ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(mmap, platformRAM, platformVRAM)
-					}(i)
+				if offloadLayersStep < e.OffloadLayers {
+					cnt := e.OffloadLayers/offloadLayersStep + 1
+					if e.OffloadLayers%offloadLayersStep != 0 || e.FullOffloaded {
+						cnt++
+					}
+					ess := make([]LLaMACppUsageEstimateMemorySummary, cnt)
+					var wg sync.WaitGroup
+					for i := 0; i < cap(ess); i++ {
+						wg.Add(1)
+						go func(i int) {
+							defer wg.Done()
+							eopts := eopts[:len(eopts):len(eopts)]
+							eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
+							ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(mmap, platformRAM, platformVRAM)
+						}(i)
+					}
+					wg.Wait()
+					ess[cap(ess)-1] = es.Memory[0]
+					es.Memory = ess
 				}
-				wg.Wait()
-				ess[cap(ess)-1] = es.Memory[0]
-				es.Memory = ess
 			}
 			o["estimate"] = es
 		}
@@ -336,12 +395,11 @@ func main() {
 	if !skipModel {
 		tprint(
 			"MODEL",
-			[]string{"Name", "Arch", "Quantization Version", "File Type", "Little Endian", "Size", "Parameters", "BPW"},
+			[]string{"Name", "Arch", "Quantization", "Little Endian", "Size", "Parameters", "BPW"},
 			nil,
 			[]string{
 				m.Name,
 				m.Architecture,
-				sprintf(m.QuantizationVersion),
 				sprintf(m.FileType),
 				sprintf(m.LittleEndian),
 				sprintf(m.Size),
@@ -351,11 +409,13 @@ func main() {
 	}
 
 	if !skipArchitecture {
-		tprint(
-			"ARCHITECTURE",
-			[]string{"Max Context Len", "Embedding Len", "Embedding GQA", "Attention Head Cnt", "Layers", "Feed Forward Len", "Expert Cnt", "Vocabulary Len"},
-			nil,
-			[]string{
+		var (
+			hd []string
+			bd []string
+		)
+		if a.Architecture != "clip" {
+			hd = []string{"Max Context Len", "Embedding Len", "Embedding GQA", "Attention Head Cnt", "Layers", "Feed Forward Len", "Expert Cnt", "Vocabulary Len"}
+			bd = []string{
 				sprintf(a.MaximumContextLength),
 				sprintf(a.EmbeddingLength),
 				sprintf(a.EmbeddingGQA),
@@ -364,10 +424,25 @@ func main() {
 				sprintf(a.FeedForwardLength),
 				sprintf(a.ExpertCount),
 				sprintf(a.VocabularyLength),
-			})
+			}
+		} else {
+			hd = []string{"Embedding Len", "Layers", "Feed Forward Len", "Encoder", "LLaVA Projector"}
+			bd = []string{
+				sprintf(a.EmbeddingLength),
+				sprintf(a.BlockCount),
+				sprintf(a.FeedForwardLength),
+				sprintf(tenary(a.ClipHasTextEncoder, tenary(a.ClipHasVisionEncoder, "Text & Vision", "Text"), tenary(a.ClipHasVisionEncoder, "Vision", "N/A"))),
+				sprintf(tenary(a.ClipHasLLaVaProjector, a.ClipProjectorType, "N/A")),
+			}
+		}
+		tprint(
+			"ARCHITECTURE",
+			hd,
+			nil,
+			bd)
 	}
 
-	if !skipTokenizer {
+	if !skipTokenizer && t.Model != "" {
 		tprint(
 			"TOKENIZER",
 			[]string{"Model", "Tokens Size", "Tokens Len", "Added Tokens Len", "BOS Token", "EOS Token", "Unknown Token", "Separator Token", "Padding Token"},
@@ -386,85 +461,84 @@ func main() {
 	}
 
 	if !skipEstimate {
+		var (
+			hd  []string
+			mg  []int
+			bds [][]string
+		)
 		es := e.Summarize(mmap, platformRAM, platformVRAM)
-		switch {
-		case offloadLayersStep > e.OffloadLayers:
-			offloadLayersStep = e.OffloadLayers
-		case offloadLayersStep <= 0:
-			offloadLayersStep = e.OffloadLayers
-		}
-		if offloadLayersStep < e.OffloadLayers {
-			cnt := e.OffloadLayers/offloadLayersStep + 1
-			if e.OffloadLayers%offloadLayersStep != 0 || e.FullOffloaded {
-				cnt++
+		if e.Architecture != "clip" {
+			hd = []string{"Arch", "Context Size", "Flash Attention", "MMap Support", "Offload Layers", "Full Offloaded", "UMA (RAM + VRAM)", "NonUMA RAM", "NonUMA VRAM"}
+			mg = []int{0, 1, 2, 3, 5}
+
+			switch {
+			case offloadLayersStep > e.OffloadLayers:
+				offloadLayersStep = e.OffloadLayers
+			case offloadLayersStep <= 0:
+				offloadLayersStep = e.OffloadLayers
 			}
-			ess := make([]LLaMACppUsageEstimateMemorySummary, cnt)
-			var wg sync.WaitGroup
-			for i := 0; i < cap(ess); i++ {
-				wg.Add(1)
-				go func(i int) {
-					defer wg.Done()
-					eopts := eopts[:len(eopts):len(eopts)]
-					eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
-					ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(mmap, platformRAM, platformVRAM)
-				}(i)
+			if offloadLayersStep < e.OffloadLayers {
+				cnt := e.OffloadLayers/offloadLayersStep + 1
+				if e.OffloadLayers%offloadLayersStep != 0 || e.FullOffloaded {
+					cnt++
+				}
+				ess := make([]LLaMACppUsageEstimateMemorySummary, cnt)
+				var wg sync.WaitGroup
+				for i := 0; i < cap(ess); i++ {
+					wg.Add(1)
+					go func(i int) {
+						defer wg.Done()
+						eopts := eopts[:len(eopts):len(eopts)]
+						eopts = append(eopts, WithOffloadLayers(uint64(i)*offloadLayersStep))
+						ess[i] = gf.EstimateLLaMACppUsage(eopts...).SummarizeMemory(mmap, platformRAM, platformVRAM)
+					}(i)
+				}
+				wg.Wait()
+				ess[cap(ess)-1] = es.Memory[0]
+				es.Memory = ess
 			}
-			wg.Wait()
-			ess[cap(ess)-1] = es.Memory[0]
-			es.Memory = ess
-		}
-		bd := make([][]string, len(es.Memory))
-		for i := range es.Memory {
-			bd[i] = []string{
-				sprintf(es.Architecture),
-				sprintf(es.ContextSize),
-				sprintf(es.FlashAttention),
-				sprintf(!es.NoMMap),
-				sprintf(tenary(es.Memory[i].FullOffloaded, sprintf("%d (%d + 1)", es.Memory[i].OffloadLayers, es.Memory[i].OffloadLayers-1), es.Memory[i].OffloadLayers)),
-				sprintf(tenary(es.Memory[i].FullOffloaded, "Yes", "No")),
-				sprintf("%s + %s = %s", es.Memory[i].UMA.RAM, es.Memory[i].NonUMA.VRAM, es.Memory[i].UMA.RAM+es.Memory[i].UMA.VRAM),
-				sprintf(es.Memory[i].NonUMA.RAM),
-				sprintf(es.Memory[i].NonUMA.VRAM),
+
+			bds = make([][]string, len(es.Memory))
+			for i := range es.Memory {
+				bds[i] = []string{
+					sprintf(es.Architecture),
+					sprintf(es.ContextSize),
+					sprintf(es.FlashAttention),
+					sprintf(!es.NoMMap),
+					sprintf(tenary(es.Memory[i].FullOffloaded, sprintf("%d (%d + 1)", es.Memory[i].OffloadLayers, es.Memory[i].OffloadLayers-1), es.Memory[i].OffloadLayers)),
+					sprintf(tenary(es.Memory[i].FullOffloaded, "Yes", "No")),
+					sprintf("%s + %s = %s", es.Memory[i].UMA.RAM, es.Memory[i].UMA.VRAM, es.Memory[i].UMA.RAM+es.Memory[i].UMA.VRAM),
+					sprintf(es.Memory[i].NonUMA.RAM),
+					sprintf(es.Memory[i].NonUMA.VRAM),
+				}
+			}
+		} else {
+			hd = []string{"Arch", "Offload Layers", "Full Offloaded", "(V)RAM"}
+			bds = [][]string{
+				{
+					sprintf(es.Architecture),
+					sprintf(es.Memory[0].OffloadLayers),
+					sprintf(tenary(es.Memory[0].FullOffloaded, "Yes", "No")),
+					sprintf(max(es.Memory[0].UMA.RAM, es.Memory[0].UMA.VRAM)),
+				},
 			}
 		}
 		tprint(
 			"ESTIMATE",
-			[]string{"Arch", "Context Size", "Flash Attention", "MMap Support", "Offload Layers", "Full Offloaded", "UMA (RAM + VRAM)", "NonUMA RAM", "NonUMA VRAM"},
-			[]int{0, 1, 2, 3, 5},
-			bd...)
+			hd,
+			mg,
+			bds...)
 	}
 }
 
 func sprintf(f any, a ...any) string {
-	switch v := f.(type) {
-	case string:
+	if v, ok := f.(string); ok {
 		if len(a) != 0 {
 			return fmt.Sprintf(v, a...)
 		}
 		return v
-	case []byte:
-		return string(v)
-	case int:
-		return strconv.Itoa(v)
-	case int32:
-		return strconv.Itoa(int(v))
-	case int64:
-		return strconv.Itoa(int(v))
-	case uint:
-		return strconv.Itoa(int(v))
-	case uint32:
-		return strconv.Itoa(int(v))
-	case uint64:
-		return strconv.Itoa(int(v))
-	case float32:
-		return strconv.FormatFloat(float64(v), 'f', -1, 32)
-	case float64:
-		return strconv.FormatFloat(v, 'f', -1, 64)
-	case bool:
-		return strconv.FormatBool(v)
-	default:
-		return fmt.Sprintf("%v", v)
 	}
+	return anyx.String(f)
 }
 
 func tprint(title string, header []string, merges []int, body ...[]string) {
diff --git a/file.go b/file.go
index 8425c2d..6f8841d 100644
--- a/file.go
+++ b/file.go
@@ -410,11 +410,14 @@ func (gf *GGUFFile) layers() GGUFLayerTensorInfos {
 	pm := make(map[string]any)
 	for i := range gf.TensorInfos {
 		ps := strings.Split(gf.TensorInfos[i].Name, ".")
+		if len(ps) < 2 {
+			ret = append(ret, gf.TensorInfos[i])
+			continue
+		}
 		switch {
 		default:
 			ret = append(ret, gf.TensorInfos[i])
-			continue
-		case len(ps) >= 2 && ps[0] == "blk":
+		case ps[0] == "blk" || ps[0] == "mm":
 			p := strings.Join([]string{ps[0], ps[1]}, ".")
 			if _, ok := pm[p]; !ok {
 				l := &GGUFNamedTensorInfos{Name: p}
@@ -423,7 +426,27 @@ func (gf *GGUFFile) layers() GGUFLayerTensorInfos {
 			}
 			l := pm[p].(*GGUFNamedTensorInfos)
 			l.GGUFLayerTensorInfos = append(l.GGUFLayerTensorInfos, gf.TensorInfos[i])
-		case len(ps) >= 3 && (ps[0] == "decoder" || ps[0] == "encoder"):
+		case ps[0] == "v" || ps[0] == "t": // Clip.
+			p := ps[0]
+			if _, ok := pm[p]; !ok {
+				xl := &GGUFNamedTensorInfos{Name: p}
+				pm[p] = xl
+				ret = append(ret, xl)
+			}
+			xl := pm[p].(*GGUFNamedTensorInfos)
+			if ps[1] != "blk" || len(ps) < 3 {
+				xl.GGUFLayerTensorInfos = append(xl.GGUFLayerTensorInfos, gf.TensorInfos[i])
+				continue
+			}
+			p = strings.Join([]string{ps[0], ps[1], ps[2]}, ".")
+			if _, ok := pm[p]; !ok {
+				l := &GGUFNamedTensorInfos{Name: p}
+				pm[p] = l
+				xl.GGUFLayerTensorInfos = append(xl.GGUFLayerTensorInfos, l)
+			}
+			l := pm[p].(*GGUFNamedTensorInfos)
+			l.GGUFLayerTensorInfos = append(l.GGUFLayerTensorInfos, gf.TensorInfos[i])
+		case ps[0] == "decoder" || ps[0] == "encoder": // BERT.
 			p := ps[0]
 			if _, ok := pm[p]; !ok {
 				xl := &GGUFNamedTensorInfos{Name: p}
@@ -431,7 +454,7 @@ func (gf *GGUFFile) layers() GGUFLayerTensorInfos {
 				ret = append(ret, xl)
 			}
 			xl := pm[p].(*GGUFNamedTensorInfos)
-			if ps[1] != "block" {
+			if ps[1] != "block" || len(ps) < 3 {
 				xl.GGUFLayerTensorInfos = append(xl.GGUFLayerTensorInfos, gf.TensorInfos[i])
 				continue
 			}
diff --git a/file_architecture.go b/file_architecture.go
index 513787a..a0f7de3 100644
--- a/file_architecture.go
+++ b/file_architecture.go
@@ -45,7 +45,7 @@ type GGUFArchitectureMetadata struct {
 	AttentionClampKQV float32 `json:"attentionClampKQV,omitempty"`
 	// AttentionLayerNormEpsilon is the epsilon value used in the LayerNorm(Layer Normalization).
 	AttentionLayerNormEpsilon float32 `json:"attentionLayerNormEpsilon,omitempty"`
-	// AttentionLayerNormRMSEpsilon is the epsilon value used in the RMSNorm(Root Mean Square Layer Normalization),
+	// AttentionLayerNormRMSEpsilon is the epsilon value used in the RMSNorm(root Mean Square Layer Normalization),
 	// which is a simplification of the original LayerNorm.
 	AttentionLayerNormRMSEpsilon float32 `json:"attentionLayerNormRMSEpsilon,omitempty"`
 	// AttentionKeyLength(n_embd_head_k) is the size of a key head.
@@ -91,6 +91,23 @@ type GGUFArchitectureMetadata struct {
 	EmbeddingValueGQA uint64 `json:"embeddingValueGQA,omitempty"`
 	// EmbeddingGGQA is the GQA of the embedding layer.
 	EmbeddingGQA uint64 `json:"embeddingGQA,omitempty"`
+
+	// ClipHasTextEncoder indicates whether the clip model has text encoder or not.
+	//
+	// Only used when Architecture is "clip".
+	ClipHasTextEncoder bool `json:"clipHasTextEncoder,omitempty"`
+	// ClipHasVisionEncoder indicates whether the clip model has vision encoder or not.
+	//
+	// Only used when Architecture is "clip".
+	ClipHasVisionEncoder bool `json:"clipHasVisionEncoder,omitempty"`
+	// ClipHasLLaVaProjector indicates whether the clip model has LLaVa projector or not.
+	//
+	// Only used when Architecture is "clip".
+	ClipHasLLaVaProjector bool `json:"clipHasLLaVaProjector,omitempty"`
+	// ClipProjectorType is the type of the projector used in the clip model.
+	//
+	// Only used when Architecture is "clip".
+	ClipProjectorType string `json:"clipProjectorType,omitempty"`
 }
 
 // Architecture returns the architecture metadata of the GGUF file.
@@ -99,6 +116,120 @@ func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata) {
 	if v, ok := gf.Header.MetadataKV.Get("general.architecture"); ok {
 		arch = v.ValueString()
 	}
+
+	if arch == "clip" {
+		return gf.clipArchitecture()
+	}
+	return gf.transformArchitecture(arch)
+}
+
+func (gf *GGUFFile) clipArchitecture() (ga GGUFArchitectureMetadata) {
+	var (
+		hasTextEncoderKey    = "clip.has_text_encoder"
+		hasVisionEncoderKey  = "clip.has_vision_encoder"
+		hasLLaVaProjectorKey = "clip.has_llava_projector"
+		projectorTypeKey     = "clip.projector_type"
+
+		textEmbeddingLengthKey              = "clip.text.embedding_length"
+		textBlockCountKey                   = "clip.text.block_count"
+		textFeedForwardLengthKey            = "clip.text.feed_forward_length"
+		textAttentionHeadCountKey           = "clip.text.attention.head_count"
+		textAttentionLayerNormRMSEpsilonKey = "clip.text.attention.layer_norm_epsilon"
+
+		visionEmbeddingLengthKey              = "clip.vision.embedding_length"
+		visionBlockCountKey                   = "clip.vision.block_count"
+		visionFeedForwardLengthKey            = "clip.vision.feed_forward_length"
+		visionAttentionHeadCountKey           = "clip.vision.attention.head_count"
+		visionAttentionLayerNormRMSEpsilonKey = "clip.vision.attention.layer_norm_epsilon"
+	)
+
+	ga.Architecture = "clip"
+
+	m, _ := gf.Header.MetadataKV.Index([]string{
+		hasTextEncoderKey,
+		hasVisionEncoderKey,
+		hasLLaVaProjectorKey,
+		projectorTypeKey,
+		textEmbeddingLengthKey,
+		textBlockCountKey,
+		textFeedForwardLengthKey,
+		textAttentionHeadCountKey,
+		textAttentionLayerNormRMSEpsilonKey,
+		visionEmbeddingLengthKey,
+		visionBlockCountKey,
+		visionFeedForwardLengthKey,
+		visionAttentionHeadCountKey,
+		visionAttentionLayerNormRMSEpsilonKey,
+	})
+
+	if v, ok := m[hasTextEncoderKey]; ok {
+		ga.ClipHasTextEncoder = v.ValueBool()
+	}
+	if v, ok := m[hasVisionEncoderKey]; ok {
+		ga.ClipHasVisionEncoder = v.ValueBool()
+	}
+	if v, ok := m[hasLLaVaProjectorKey]; ok {
+		ga.ClipHasLLaVaProjector = v.ValueBool()
+	}
+	if v, ok := m[projectorTypeKey]; ok {
+		ga.ClipProjectorType = v.ValueString()
+	} else {
+		ga.ClipProjectorType = "mlp"
+	}
+
+	if v, ok := m[textEmbeddingLengthKey]; ok {
+		ga.EmbeddingLength = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[textBlockCountKey]; ok {
+		ga.BlockCount = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[textFeedForwardLengthKey]; ok {
+		ga.FeedForwardLength = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[textAttentionHeadCountKey]; ok {
+		ga.AttentionHeadCount = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[textAttentionLayerNormRMSEpsilonKey]; ok {
+		ga.AttentionLayerNormRMSEpsilon = ValueNumeric[float32](v)
+	}
+
+	if v, ok := m[visionEmbeddingLengthKey]; ok {
+		ga.EmbeddingLength = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[visionBlockCountKey]; ok {
+		ga.BlockCount = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[visionFeedForwardLengthKey]; ok {
+		ga.FeedForwardLength = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[visionAttentionHeadCountKey]; ok {
+		ga.AttentionHeadCount = ValueNumeric[uint64](v)
+	}
+	if v, ok := m[visionAttentionLayerNormRMSEpsilonKey]; ok {
+		ga.AttentionLayerNormRMSEpsilon = ValueNumeric[float32](v)
+	}
+
+	ga.AttentionHeadCountKV = ga.AttentionHeadCount
+
+	{
+		if ga.AttentionHeadCountKV > 0 {
+			ga.EmbeddingGroup = ga.AttentionHeadCount / ga.AttentionHeadCountKV
+		}
+		if ga.AttentionHeadCount > 0 {
+			ga.EmbeddingKeyGQA = uint64(ga.AttentionKeyLength) * ga.AttentionHeadCountKV
+			ga.EmbeddingValueGQA = uint64(ga.AttentionValueLength) * ga.AttentionHeadCountKV
+		}
+		if ga.Architecture == "mamba" {
+			ga.EmbeddingKeyGQA = uint64((ga.SSMConvolutionKernel - 1) * ga.SSMInnerSize)
+			ga.EmbeddingValueGQA = uint64(ga.SSMStateSize * ga.SSMInnerSize)
+		}
+		ga.EmbeddingGQA = ga.EmbeddingValueGQA
+	}
+
+	return ga
+}
+
+func (gf *GGUFFile) transformArchitecture(arch string) (ga GGUFArchitectureMetadata) {
 	var (
 		contextLengthKey     = arch + ".context_length"
 		embeddingLengthKey   = arch + ".embedding_length"
diff --git a/file_estimate.go b/file_estimate.go
index c211986..453afa6 100644
--- a/file_estimate.go
+++ b/file_estimate.go
@@ -42,6 +42,8 @@ type (
 		KVCache LLaMACppKVCacheUsage `json:"kvCache"`
 		// Computation is the memory usage of computation.
 		Computation LLaMACppComputationUsage `json:"computation"`
+		// Clipper is the memory usage of clipper.
+		Clipper GGUFBytesScalar `json:"clipper"`
 	}
 
 	// LLaMACppWeightUsage represents the memory usage of loading weights in llama.cpp.
@@ -170,6 +172,11 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 		isOffloadOutputLayer bool
 	)
 	{
+		// For clip,
+		// see https://github.com/ggerganov/llama.cpp/blob/148ec970b62c3c5ae0a8bfdaad2fc237aaae350d/examples/llava/clip.cpp#L994-L1008.
+		if a.Architecture == "clip" {
+			o.OffloadLayers = ptr.To(a.BlockCount + 1) // Clip means full offload.
+		}
 		if v := o.OffloadLayers; v == nil {
 			o.OffloadLayers = ptr.To(a.BlockCount)
 			nOffloadLayers = a.BlockCount
@@ -221,12 +228,17 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 	// Weight.
 	{
 		// Compute.
-		for i, offloadStart := uint64(0), uint64(len(tfLs))-nOffloadLayers; i < uint64(len(tfLs)); i++ {
-			switch {
-			case i < nLoadLayers:
-				e.Load.Weight.Compute += GGUFBytesScalar(tfLs[i].Bytes())
-			case i >= offloadStart:
-				e.Offload.Weight.Compute += GGUFBytesScalar(tfLs[i].Bytes())
+		switch a.Architecture {
+		case "clip":
+			e.Offload.Weight.Compute = GGUFBytesScalar(ls.Bytes())
+		default:
+			for i, offloadStart := uint64(0), uint64(len(tfLs))-nOffloadLayers; i < uint64(len(tfLs)); i++ {
+				switch {
+				case i < nLoadLayers:
+					e.Load.Weight.Compute += GGUFBytesScalar(tfLs[i].Bytes())
+				case i >= offloadStart:
+					e.Offload.Weight.Compute += GGUFBytesScalar(tfLs[i].Bytes())
+				}
 			}
 		}
 
@@ -290,10 +302,13 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 			inpSMask  = GGMLTypeF32.RowSizeOf([]uint64{1, nKV})                    // F32 [1, n_kv]
 			inpSSeq   = GGMLTypeI32.RowSizeOf([]uint64{nKV, nBatch})               // I32 [n_kv, n_batch]
 		)
-		if a.Architecture == "mamba" {
+		switch a.Architecture {
+		case "clip":
+			// NOP.
+		case "mamba":
 			e.Load.Computation.Input = GGUFBytesScalar(inpTokens + inpEmbd + inpSMask + inpSSeq + inpOutIds)
 			e.Offload.Computation.Input = GGUFBytesScalar(inpEmbd + inpSMask + inpSSeq + inpOutIds)
-		} else {
+		default:
 			e.Load.Computation.Input = GGUFBytesScalar(inpTokens + inpEmbd + inpPos + inpKQMask + inpOutIds)
 			e.Offload.Computation.Input = GGUFBytesScalar(inpEmbd + inpPos + inpKQMask + inpOutIds)
 		}
@@ -301,7 +316,10 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 		// the allocated memory can be reused for the next layer.
 		// So, we only consider the usage of the largest layer,
 		// which is the last layer by default.
-		if a.Architecture == "mamba" {
+		switch a.Architecture {
+		case "clip":
+			// NOP.
+		case "mamba":
 			convInc := GGMLTypeF32.RowSizeOf([]uint64{a.EmbeddingKeyGQA, nKV}) // F32 [n_embd_key_gqa, n_kv] reshape
 			for _, l := range tfLs[len(tfLs)-1].Search(regexp.MustCompile(`.*\.\d+\.(attn_norm|ssm_in|ssm_conv1d)\.weight`)) {
 				if !strings.HasSuffix(l.Name, ".ssm_conv1d.weight") {
@@ -325,7 +343,7 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 				ssmInc += rs
 			}
 			e.Offload.Computation.Compute = GGUFBytesScalar(convInc + ssmInc)
-		} else {
+		default:
 			loadAttnInc, offloadAttnInc := uint64(0), uint64(0)
 			if o.FlashAttention {
 				// https://github.com/ggerganov/llama.cpp/blob/172c8256840ffd882ab9992ecedbb587d9b21f15/llama.cpp#L7387.
@@ -389,7 +407,10 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 			}
 		}
 		// Finally, get the usage of output layer.
-		{
+		switch a.Architecture {
+		case "clip":
+			// NOP.
+		default:
 			outInc := inpEmbd
 			if a.Architecture == "mamba" {
 				outInc += inpSMask + inpSSeq
@@ -404,6 +425,11 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) (
 			outInc += uint64(e.Load.Weight.Output)
 			e.Offload.Computation.Output = GGUFBytesScalar(outInc)
 		}
+
+		// Clipper.
+		if o.ClipUsage != nil {
+			e.Offload.Clipper = GGUFBytesScalar(*o.ClipUsage)
+		}
 	}
 
 	return e
@@ -457,7 +483,7 @@ type (
 
 // SummarizeMemory returns the summary of the estimated memory usage of loading the GGUF file in llama.cpp,
 // the input options are used to adjust the summary.
-func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool, ramFootprint, vramFootprint uint64) (ems LLaMACppUsageEstimateMemorySummary) {
+func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool, nonUMARamFootprint, nonUMAVramFootprint uint64) (ems LLaMACppUsageEstimateMemorySummary) {
 	ems.OffloadLayers, ems.FullOffloaded = e.OffloadLayers, e.FullOffloaded
 	if ems.FullOffloaded {
 		ems.OffloadLayers++ // The output layer is offloaded.
@@ -479,13 +505,14 @@ func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool, ramFootprint, vramFoot
 		wg = e.Offload.Weight.Sum()
 		kv = e.Offload.KVCache.Sum()
 		cp = 0
-		ems.UMA.VRAM = fp + wg + kv + cp
+		cl := e.Offload.Clipper
+		ems.UMA.VRAM = fp + wg + kv + cp + cl
 	}
 
 	// NonUMA.
 	{
 		// RAM.
-		fp := GGUFBytesScalar(ramFootprint) + e.Load.Footprint
+		fp := GGUFBytesScalar(nonUMARamFootprint) + e.Load.Footprint
 		wg := e.Load.Weight.Sum()
 		kv := e.Load.KVCache.Sum()
 		cp := e.Load.Computation.Sum()
@@ -497,11 +524,12 @@ func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool, ramFootprint, vramFoot
 			}
 		}
 		// VRAM.
-		fp = GGUFBytesScalar(vramFootprint) + e.Offload.Footprint
+		fp = GGUFBytesScalar(nonUMAVramFootprint) + e.Offload.Footprint
 		wg = e.Offload.Weight.Sum()
 		kv = e.Offload.KVCache.Sum()
 		cp = e.Offload.Computation.Sum()
-		ems.NonUMA.VRAM = fp + wg + kv + cp
+		cl := e.Offload.Clipper
+		ems.NonUMA.VRAM = fp + wg + kv + cp + cl
 	}
 
 	return ems
@@ -509,10 +537,10 @@ func (e LLaMACppUsageEstimate) SummarizeMemory(mmap bool, ramFootprint, vramFoot
 
 // Summarize returns the summary of the estimated result of loading the GGUF file in llama.cpp,
 // the input options are used to adjust the summary.
-func (e LLaMACppUsageEstimate) Summarize(mmap bool, ramFootprint, vramFootprint uint64) (es LLaMACppUsageEstimateSummary) {
+func (e LLaMACppUsageEstimate) Summarize(mmap bool, nonUMARamFootprint, nonUMAVramFootprint uint64) (es LLaMACppUsageEstimateSummary) {
 	// Summarize memory.
 	es.Memory = []LLaMACppUsageEstimateMemorySummary{
-		e.SummarizeMemory(mmap, ramFootprint, vramFootprint),
+		e.SummarizeMemory(mmap, nonUMARamFootprint, nonUMAVramFootprint),
 	}
 
 	// Just copy from the original estimate.
diff --git a/file_estimate_option.go b/file_estimate_option.go
index f82d09c..a70274e 100644
--- a/file_estimate_option.go
+++ b/file_estimate_option.go
@@ -18,6 +18,7 @@ type (
 		OffloadKVCache    *bool
 		OffloadLayers     *uint64
 		FlashAttention    bool
+		ClipUsage         *uint64
 	}
 	LLaMACppUsageEstimateOption func(*_LLaMACppUsageEstimateOptions)
 )
@@ -118,3 +119,11 @@ func WithFlashAttention() LLaMACppUsageEstimateOption {
 		o.FlashAttention = true
 	}
 }
+
+// WithClipUsage sets the clip usage for the estimate,
+// which affects the usage of VRAM.
+func WithClipUsage(clip uint64) LLaMACppUsageEstimateOption {
+	return func(o *_LLaMACppUsageEstimateOptions) {
+		o.ClipUsage = &clip
+	}
+}
diff --git a/file_from_metadata.go b/file_from_distro.go
similarity index 76%
rename from file_from_metadata.go
rename to file_from_distro.go
index edef6e9..c15f907 100644
--- a/file_from_metadata.go
+++ b/file_from_distro.go
@@ -11,7 +11,6 @@ import (
 	"time"
 
 	"golang.org/x/exp/maps"
-	"golang.org/x/net/html"
 
 	"github.com/thxcode/gguf-parser-go/util/funcx"
 	"github.com/thxcode/gguf-parser-go/util/httpx"
@@ -32,16 +31,26 @@ var (
 // which will be more efficient and faster, but lossy.
 // If the crawling fails, it will fall back to the default behavior.
 func ParseGGUFFileFromOllama(ctx context.Context, model string, crawl bool, opts ...GGUFReadOption) (*GGUFFile, error) {
+	return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), crawl, opts...)
+}
+
+// ParseGGUFFileFromOllamaModel is similar to ParseGGUFFileFromOllama,
+// but inputs an OllamaModel instead of a string.
+//
+// The given OllamaModel will be completed(fetching MediaType, Config and Layers) after calling this function.
+// If the crawl is true, it will try to crawl the metadata from Ollama website instead of blobs fetching,
+// which will be more efficient and faster, but lossy.
+// If the crawling fails, it will fall back to the default behavior.
+func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, crawl bool, opts ...GGUFReadOption) (*GGUFFile, error) {
+	if model == nil {
+		return nil, ErrOllamaInvalidModel
+	}
+
 	var o _GGUFReadOptions
 	for _, opt := range opts {
 		opt(&o)
 	}
 
-	om := ParseOllamaModel(model)
-	if om == nil {
-		return nil, ErrOllamaInvalidModel
-	}
-
 	cli := httpx.Client(
 		httpx.ClientOptions().
 			WithUserAgent("gguf-parser-go").
@@ -70,70 +79,31 @@ func ParseGGUFFileFromOllama(ctx context.Context, model string, crawl bool, opts
 
 	var ml OllamaModelLayer
 	{
-		err := om.Complete(ctx, cli)
+		err := model.Complete(ctx, cli)
 		if err != nil {
 			return nil, fmt.Errorf("complete ollama model: %w", err)
 		}
 
 		var ok bool
-		ml, ok = om.GetLayer("application/vnd.ollama.image.model")
+		ml, ok = model.GetLayer("application/vnd.ollama.image.model")
 		if !ok {
 			return nil, ErrOllamaBaseLayerNotFound
 		}
 	}
 
 	if crawl {
-		mwu, lwu := om.WebURL().String(), ml.WebURL().String()
-		req, err := httpx.NewGetRequestWithContext(ctx, lwu)
-		if err != nil {
-			return nil, fmt.Errorf("new request: %w", err)
-		}
-		req.Header.Add("Referer", mwu)
-		req.Header.Add("Hx-Current-Url", mwu)
-		req.Header.Add("Hx-Request", "true")
-		req.Header.Add("Hx-Target", "file-explorer")
-
-		var n *html.Node
-		err = httpx.Do(cli, req, func(resp *http.Response) error {
-			if resp.StatusCode != http.StatusOK {
-				return fmt.Errorf("status code %d", resp.StatusCode)
-			}
-			n, err = html.Parse(resp.Body)
-			if err != nil {
-				return fmt.Errorf("parse html: %w", err)
-			}
-			return nil
-		})
+		r, err := ml.FetchWebPage(ctx, cli)
 		if err == nil {
-			var wk func(*html.Node) string
-			wk = func(n *html.Node) string {
-				if n.Type == html.ElementNode && n.Data == "div" {
-					for i := range n.Attr {
-						if n.Attr[i].Key == "class" && n.Attr[i].Val == "whitespace-pre-wrap" {
-							return n.FirstChild.Data
-						}
-					}
-				}
-				for c := n.FirstChild; c != nil; c = c.NextSibling {
-					if r := wk(c); r != "" {
-						return r
-					}
-				}
-				return ""
-			}
-
-			if r := wk(n); r != "" {
-				gf, err := parseGGUFFileFromMetadata("ollama", r, ml.Size)
-				if err == nil {
-					return gf, nil
-				}
+			gf, err := parseGGUFFileFromDistroMetadata("ollama", r, ml.Size)
+			if err == nil {
+				return gf, nil
 			}
 		}
 
 		// Fallback to the default behavior.
 	}
 
-	return parseGGUFFileFromRemote(ctx, cli, ml.URL().String(), o)
+	return parseGGUFFileFromRemote(ctx, cli, ml.BlobURL().String(), o)
 }
 
 type _OllamaMetadata struct {
@@ -148,7 +118,7 @@ type _OllamaMetadata struct {
 	Version uint32 `json:"version"`
 }
 
-func parseGGUFFileFromMetadata(source, data string, size uint64) (*GGUFFile, error) {
+func parseGGUFFileFromDistroMetadata(source, data string, size uint64) (*GGUFFile, error) {
 	if source != "ollama" {
 		return nil, fmt.Errorf("invalid source %q", source)
 	}
@@ -174,7 +144,7 @@ func parseGGUFFileFromMetadata(source, data string, size uint64) (*GGUFFile, err
 	gf.Header.Magic = GGUFMagicGGUFLe
 	gf.Header.Version = GGUFVersion(m.Version)
 	gf.Header.TensorCount = uint64(len(m.Tensors))
-	gf.Header.MetadataKVCount = uint64(len(m.Metadata) + 1 /* tokenizer.chat_template */)
+	gf.Header.MetadataKVCount = uint64(1 /* tokenizer.chat_template */ + len(m.Metadata))
 	gf.Size = GGUFBytesScalar(size)
 	gf.ModelParameters = GGUFParametersScalar(m.NumParams)
 
@@ -223,6 +193,24 @@ func parseGGUFFileFromMetadata(source, data string, size uint64) (*GGUFFile, err
 				}
 				v = av
 			}
+		case []any:
+			vt = GGUFMetadataValueTypeArray
+			av := GGUFMetadataKVArrayValue{
+				Type: GGUFMetadataValueTypeString,
+				Len:  uint64(len(vv)),
+			}
+			if av.Len > 0 {
+				av.Array = vv
+				switch vv[0].(type) {
+				case bool:
+					av.Type = GGUFMetadataValueTypeBool
+				case float64:
+					av.Type = GGUFMetadataValueTypeFloat32
+				case int64:
+					av.Type = GGUFMetadataValueTypeUint32
+				}
+			}
+			v = av
 		}
 		gf.Header.MetadataKV = append(gf.Header.MetadataKV, GGUFMetadataKV{
 			Key:       k,
diff --git a/file_from_remote.go b/file_from_remote.go
index 75d4226..fed6c06 100644
--- a/file_from_remote.go
+++ b/file_from_remote.go
@@ -17,7 +17,7 @@ func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, file string, opts .
 	return ParseGGUFFileRemote(ctx, fmt.Sprintf("https://huggingface.co/%s/resolve/main/%s", repo, file), opts...)
 }
 
-// ParseGGUFFileRemote parses a GGUF file from a remote URL,
+// ParseGGUFFileRemote parses a GGUF file from a remote BlobURL,
 // and returns a GGUFFile, or an error if any.
 func ParseGGUFFileRemote(ctx context.Context, url string, opts ...GGUFReadOption) (*GGUFFile, error) {
 	var o _GGUFReadOptions
diff --git a/file_model.go b/file_model.go
index 8d90382..485d4a6 100644
--- a/file_model.go
+++ b/file_model.go
@@ -132,6 +132,8 @@ func (gf *GGUFFile) Model() (gm GGUFModelMetadata) {
 
 	if v, ok := m[architectureKey]; ok {
 		gm.Architecture = v.ValueString()
+	} else {
+		gm.Architecture = "llama"
 	}
 	if v, ok := m[quantizationKey]; ok {
 		gm.QuantizationVersion = ValueNumeric[uint32](v)
diff --git a/go.mod b/go.mod
index 78047d8..3a6ade9 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842
 	golang.org/x/net v0.25.0
+	golang.org/x/sync v0.7.0
 	golang.org/x/sys v0.20.0
 	golang.org/x/tools v0.21.0
 )
@@ -20,6 +21,5 @@ require (
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	golang.org/x/mod v0.17.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/ollama_model.go b/ollama_model.go
index 8475b9e..dfd25ae 100644
--- a/ollama_model.go
+++ b/ollama_model.go
@@ -8,6 +8,9 @@ import (
 	"regexp"
 	"strings"
 
+	"golang.org/x/net/html"
+	"golang.org/x/sync/errgroup"
+
 	"github.com/thxcode/gguf-parser-go/util/httpx"
 	"github.com/thxcode/gguf-parser-go/util/json"
 	"github.com/thxcode/gguf-parser-go/util/stringx"
@@ -24,6 +27,8 @@ const (
 )
 
 type (
+	// OllamaModel represents an Ollama model,
+	// its manifest(including MediaType, Config and Layers) can be completed further by calling the Complete method.
 	OllamaModel struct {
 		Schema        string             `json:"schema"`
 		Registry      string             `json:"registry"`
@@ -34,13 +39,32 @@ type (
 		MediaType     string             `json:"mediaType"`
 		Config        OllamaModelLayer   `json:"config"`
 		Layers        []OllamaModelLayer `json:"layers"`
+
+		// Client is the http client used to complete the OllamaModel's network operations.
+		//
+		// When this field is nil,
+		// it will be set to the client used by OllamaModel.Complete.
+		//
+		// When this field is offered,
+		// the network operations will be done with this client.
+		Client *http.Client `json:"-"`
 	}
+
+	// OllamaModelLayer represents an Ollama model layer,
+	// its digest can be used to download the artifact.
 	OllamaModelLayer struct {
 		MediaType string `json:"mediaType"`
 		Size      uint64 `json:"size"`
 		Digest    string `json:"digest"`
 
-		model *OllamaModel
+		// Root points to the root OllamaModel,
+		// which is never serialized or deserialized.
+		//
+		// When called OllamaModel.Complete,
+		// this field will be set to the OllamaModel itself.
+		// If not, this field will be nil,
+		// and must be set manually to the root OllamaModel before calling the method of OllamaModelLayer.
+		Root *OllamaModel `json:"-"`
 	}
 )
 
@@ -142,73 +166,324 @@ func (om *OllamaModel) SearchLayers(mediaTypeRegex *regexp.Regexp) []OllamaModel
 	return ls
 }
 
-// URL returns the URL of the OllamaModel.
-func (om *OllamaModel) URL() *url.URL {
+// WebPageURL returns the Ollama web page URL of the OllamaModel.
+func (om *OllamaModel) WebPageURL() *url.URL {
 	u := &url.URL{
 		Scheme: om.Schema,
 		Host:   om.Registry,
 	}
-	return u.JoinPath("v2", om.Namespace, om.Repository, "manifests", om.Tag)
+	return u.JoinPath(om.Namespace, om.Repository+":"+om.Tag)
 }
 
-// WebURL returns the Ollama web URL of the OllamaModel.
-func (om *OllamaModel) WebURL() *url.URL {
+// Complete completes the OllamaModel with the given context and http client.
+func (om *OllamaModel) Complete(ctx context.Context, cli *http.Client) error {
+	if om.Client == nil {
+		om.Client = cli
+	}
+
 	u := &url.URL{
 		Scheme: om.Schema,
 		Host:   om.Registry,
 	}
-	return u.JoinPath(om.Namespace, om.Repository+":"+om.Tag)
-}
+	u = u.JoinPath("v2", om.Namespace, om.Repository, "manifests", om.Tag)
 
-// Complete completes the OllamaModel with the given context and http client.
-func (om *OllamaModel) Complete(ctx context.Context, cli *http.Client) error {
-	req, err := httpx.NewGetRequestWithContext(ctx, om.URL().String())
+	req, err := httpx.NewGetRequestWithContext(ctx, u.String())
 	if err != nil {
 		return fmt.Errorf("new request: %w", err)
 	}
 
-	err = httpx.Do(cli, req, func(resp *http.Response) error {
+	err = httpx.Do(om.Client, req, func(resp *http.Response) error {
 		if resp.StatusCode != http.StatusOK {
 			return fmt.Errorf("status code %d", resp.StatusCode)
 		}
 		return json.NewDecoder(resp.Body).Decode(om)
 	})
 	if err != nil {
-		return fmt.Errorf("do request: %w", err)
+		return fmt.Errorf("do request %s: %w", u, err)
 	}
 
 	// Connect.
-	om.Config.model = om
+	om.Config.Root = om
 	for i := range om.Layers {
-		om.Layers[i].model = om
+		om.Layers[i].Root = om
 	}
 
 	return nil
 }
 
-// URL returns the URL of the OllamaModelLayer.
-func (ol *OllamaModelLayer) URL() *url.URL {
-	if ol.model == nil {
+// Params returns the parameters of the OllamaModel.
+func (om *OllamaModel) Params(ctx context.Context, cli *http.Client) (map[string]any, error) {
+	if cli == nil {
+		cli = om.Client
+	}
+	if cli == nil {
+		return nil, fmt.Errorf("no client")
+	}
+
+	mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.params$`))
+	if len(mls) == 0 {
+		return nil, nil
+	}
+
+	rs := make([]map[string]any, len(mls))
+	eg, ctx := errgroup.WithContext(ctx)
+	for i := range mls {
+		x := i
+		eg.Go(func() error {
+			bs, err := mls[x].FetchBlob(ctx, cli)
+			if err == nil {
+				p := make(map[string]any)
+				if err = json.Unmarshal(bs, &p); err == nil {
+					rs[x] = p
+				}
+			}
+			return err
+		})
+	}
+	if err := eg.Wait(); err != nil {
+		return nil, fmt.Errorf("fetch blob: %w", err)
+	}
+
+	r := make(map[string]any)
+	for i := range rs {
+		for k, v := range rs[i] {
+			r[k] = v
+		}
+	}
+	return r, nil
+}
+
+// Template returns the template of the OllamaModel.
+func (om *OllamaModel) Template(ctx context.Context, cli *http.Client) (string, error) {
+	if cli == nil {
+		cli = om.Client
+	}
+	if cli == nil {
+		return "", fmt.Errorf("no client")
+	}
+
+	mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.(prompt|template)$`))
+	if len(mls) == 0 {
+		return "", nil
+	}
+
+	ml := mls[len(mls)-1]
+	bs, err := ml.FetchBlob(ctx, cli)
+	if err != nil {
+		return "", fmt.Errorf("fetch blob: %w", err)
+	}
+	return stringx.FromBytes(&bs), nil
+}
+
+// System returns the system message of the OllamaModel.
+func (om *OllamaModel) System(ctx context.Context, cli *http.Client) (string, error) {
+	if cli == nil {
+		cli = om.Client
+	}
+	if cli == nil {
+		return "", fmt.Errorf("no client")
+	}
+
+	mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.system$`))
+	if len(mls) == 0 {
+		return "", nil
+	}
+
+	ml := mls[len(mls)-1]
+	bs, err := ml.FetchBlob(ctx, cli)
+	if err != nil {
+		return "", fmt.Errorf("fetch blob: %w", err)
+	}
+	return stringx.FromBytes(&bs), nil
+}
+
+// License returns the license of the OllamaModel.
+func (om *OllamaModel) License(ctx context.Context, cli *http.Client) ([]string, error) {
+	if cli == nil {
+		cli = om.Client
+	}
+	if cli == nil {
+		return nil, fmt.Errorf("no client")
+	}
+
+	mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.license$`))
+	if len(mls) == 0 {
+		return nil, nil
+	}
+
+	rs := make([]string, len(mls))
+	eg, ctx := errgroup.WithContext(ctx)
+	for i := range mls {
+		x := i
+		eg.Go(func() error {
+			bs, err := mls[x].FetchBlob(ctx, cli)
+			if err == nil {
+				rs[x] = stringx.FromBytes(&bs)
+			}
+			return err
+		})
+	}
+	if err := eg.Wait(); err != nil {
+		return nil, fmt.Errorf("fetch blob: %w", err)
+	}
+	return rs, nil
+}
+
+// Messages returns the messages of the OllamaModel.
+func (om *OllamaModel) Messages(ctx context.Context, cli *http.Client) ([]json.RawMessage, error) {
+	if cli == nil {
+		cli = om.Client
+	}
+	if cli == nil {
+		return nil, fmt.Errorf("no client")
+	}
+
+	mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.messages$`))
+	if len(mls) == 0 {
+		return nil, nil
+	}
+
+	rs := make([]json.RawMessage, len(mls))
+	eg, ctx := errgroup.WithContext(ctx)
+	for i := range mls {
+		x := i
+		eg.Go(func() error {
+			bs, err := mls[x].FetchBlob(ctx, cli)
+			if err == nil {
+				rs[x] = bs
+			}
+			return err
+		})
+	}
+	if err := eg.Wait(); err != nil {
+		return nil, fmt.Errorf("fetch blob: %w", err)
+	}
+	return rs, nil
+}
+
+// BlobURL returns the blob URL of the OllamaModelLayer.
+func (ol *OllamaModelLayer) BlobURL() *url.URL {
+	if ol.Root == nil {
 		return nil
 	}
 
 	u := &url.URL{
-		Scheme: ol.model.Schema,
-		Host:   ol.model.Registry,
+		Scheme: ol.Root.Schema,
+		Host:   ol.Root.Registry,
+	}
+	return u.JoinPath("v2", ol.Root.Namespace, ol.Root.Repository, "blobs", ol.Digest)
+}
+
+// FetchBlob fetches the blob of the OllamaModelLayer with the given context and http client,
+// and returns the response body as bytes.
+func (ol *OllamaModelLayer) FetchBlob(ctx context.Context, cli *http.Client) ([]byte, error) {
+	var b []byte
+	err := ol.FetchBlobFunc(ctx, cli, func(resp *http.Response) error {
+		b = httpx.BodyBytes(resp)
+		return nil
+	})
+	return b, err
+}
+
+// FetchBlobFunc fetches the blob of the OllamaModelLayer with the given context and http client,
+// and processes the response with the given function.
+func (ol *OllamaModelLayer) FetchBlobFunc(ctx context.Context, cli *http.Client, process func(*http.Response) error) error {
+	if cli == nil {
+		cli = ol.Root.Client
+	}
+	if cli == nil {
+		return fmt.Errorf("no client")
+	}
+
+	u := ol.BlobURL()
+	if u == nil {
+		return fmt.Errorf("no blob URL")
+	}
+
+	req, err := httpx.NewGetRequestWithContext(ctx, u.String())
+	if err != nil {
+		return fmt.Errorf("new request: %w", err)
+	}
+
+	err = httpx.Do(cli, req, process)
+	if err != nil {
+		return fmt.Errorf("do request %s: %w", u, err)
 	}
-	return u.JoinPath("v2", ol.model.Namespace, ol.model.Repository, "blobs", ol.Digest)
+	return nil
 }
 
-// WebURL returns the Ollama web URL of the OllamaModelLayer.
-func (ol *OllamaModelLayer) WebURL() *url.URL {
-	if ol.model == nil || len(ol.MediaType) < 12 {
+// WebPageURL returns the Ollama web page URL of the OllamaModelLayer.
+func (ol *OllamaModelLayer) WebPageURL() *url.URL {
+	if ol.Root == nil || len(ol.MediaType) < 12 {
 		return nil
 	}
 
 	dg := strings.TrimPrefix(ol.Digest, "sha256:")[:12]
 	u := &url.URL{
-		Scheme: ol.model.Schema,
-		Host:   ol.model.Registry,
+		Scheme: ol.Root.Schema,
+		Host:   ol.Root.Registry,
+	}
+	return u.JoinPath(ol.Root.Namespace, ol.Root.Repository+":"+ol.Root.Tag, "blobs", dg)
+}
+
+// FetchWebPage fetches the web page of the OllamaModelLayer with the given context and http client,
+// and processes the response with the given function.
+func (ol *OllamaModelLayer) FetchWebPage(ctx context.Context, cli *http.Client) (string, error) {
+	if cli == nil {
+		cli = ol.Root.Client
+	}
+	if cli == nil {
+		return "", fmt.Errorf("no client")
+	}
+
+	u := ol.WebPageURL()
+	if u == nil {
+		return "", fmt.Errorf("no BlobURL")
+	}
+
+	req, err := httpx.NewGetRequestWithContext(ctx, u.String())
+	if err != nil {
+		return "", fmt.Errorf("new request: %w", err)
+	}
+	{
+		rus := ol.Root.WebPageURL().String()
+		req.Header.Add("Referer", rus)
+		req.Header.Add("Hx-Current-Url", rus)
+		req.Header.Add("Hx-Request", "true")
+		req.Header.Add("Hx-Target", "file-explorer")
+	}
+
+	var n *html.Node
+	err = httpx.Do(cli, req, func(resp *http.Response) error {
+		if resp.StatusCode != http.StatusOK {
+			return fmt.Errorf("status code %d", resp.StatusCode)
+		}
+		n, err = html.Parse(resp.Body)
+		if err != nil {
+			return fmt.Errorf("parse html: %w", err)
+		}
+		return nil
+	})
+	if err != nil {
+		return "", fmt.Errorf("do request %s: %w", u, err)
+	}
+
+	var wk func(*html.Node) string
+	wk = func(n *html.Node) string {
+		if n.Type == html.ElementNode && n.Data == "div" {
+			for i := range n.Attr {
+				if n.Attr[i].Key == "class" && n.Attr[i].Val == "whitespace-pre-wrap" {
+					return n.FirstChild.Data
+				}
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			if r := wk(c); r != "" {
+				return r
+			}
+		}
+		return ""
 	}
-	return u.JoinPath(ol.model.Namespace, ol.model.Repository+":"+ol.model.Tag, "blobs", dg)
+
+	return wk(n), nil
 }
diff --git a/util/anyx/any.go b/util/anyx/any.go
new file mode 100644
index 0000000..8c74fa1
--- /dev/null
+++ b/util/anyx/any.go
@@ -0,0 +1,128 @@
+package anyx
+
+import (
+	"encoding/json"
+	"fmt"
+	"strconv"
+
+	"golang.org/x/exp/constraints"
+)
+
+// Number converts any type to the specified number type.
+func Number[T constraints.Integer | constraints.Float](v any) T {
+	switch vv := v.(type) {
+	case int:
+		return T(vv)
+	case int8:
+		return T(vv)
+	case int16:
+		return T(vv)
+	case int32:
+		return T(vv)
+	case int64:
+		return T(vv)
+	case uint:
+		return T(vv)
+	case uint8:
+		return T(vv)
+	case uint16:
+		return T(vv)
+	case uint32:
+		return T(vv)
+	case uint64:
+		return T(vv)
+	case float32:
+		return T(vv)
+	case float64:
+		return T(vv)
+	case bool:
+		if vv {
+			return T(1)
+		}
+		return T(0)
+	case string:
+		x, err := strconv.ParseInt(vv, 10, 64)
+		if err != nil {
+			y, err := strconv.ParseFloat(vv, 64)
+			if err != nil {
+				return T(0)
+			} else {
+				return T(y)
+			}
+		}
+		return T(x)
+	case json.Number:
+		x, err := vv.Int64()
+		if err != nil {
+			y, err := vv.Float64()
+			if err != nil {
+				return T(0)
+			} else {
+				return T(y)
+			}
+		}
+		return T(x)
+	default:
+		return T(0)
+	}
+}
+
+// Bool converts any type to a bool.
+func Bool(v any) bool {
+	switch vv := v.(type) {
+	case bool:
+		return vv
+	case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, uintptr:
+		return vv != 0
+	case float32, float64:
+		return vv != 0
+	case string:
+		return vv != "0"
+	case fmt.Stringer:
+		return vv.String() != "0"
+	default:
+		return false
+	}
+}
+
+// String converts any type to a string.
+func String(v any) string {
+	switch vv := v.(type) {
+	case string:
+		return vv
+	case []byte:
+		return string(vv)
+	case int:
+		return strconv.FormatInt(int64(vv), 10)
+	case int8:
+		return strconv.FormatInt(int64(vv), 10)
+	case int16:
+		return strconv.FormatInt(int64(vv), 10)
+	case int32:
+		return strconv.FormatInt(int64(vv), 10)
+	case int64:
+		return strconv.FormatInt(vv, 10)
+	case uint:
+		return strconv.FormatUint(uint64(vv), 10)
+	case uint8:
+		return strconv.FormatUint(uint64(vv), 10)
+	case uint16:
+		return strconv.FormatUint(uint64(vv), 10)
+	case uint32:
+		return strconv.FormatUint(uint64(vv), 10)
+	case uint64:
+		return strconv.FormatUint(vv, 10)
+	case float32:
+		return strconv.FormatFloat(float64(vv), 'f', -1, 32)
+	case float64:
+		return strconv.FormatFloat(vv, 'f', -1, 64)
+	case bool:
+		return strconv.FormatBool(vv)
+	case fmt.Stringer:
+		return vv.String()
+	case json.RawMessage:
+		return string(vv)
+	default:
+		return fmt.Sprintf("%v", v)
+	}
+}
diff --git a/util/httpx/client.go b/util/httpx/client.go
index 5d0c486..d925e05 100644
--- a/util/httpx/client.go
+++ b/util/httpx/client.go
@@ -246,5 +246,8 @@ func Do(cli *http.Client, req *http.Request, respFunc func(*http.Response) error
 		return fmt.Errorf("do request: %w", err)
 	}
 	defer Close(resp)
+	if respFunc == nil {
+		return nil
+	}
 	return respFunc(resp)
 }
diff --git a/util/json/common.go b/util/json/common.go
index ec77692..57a5406 100644
--- a/util/json/common.go
+++ b/util/json/common.go
@@ -10,6 +10,8 @@ type RawMessage = stdjson.RawMessage
 var (
 	MarshalIndent = stdjson.MarshalIndent
 	Indent        = stdjson.Indent
+	NewEncoder    = stdjson.NewEncoder
+	Valid         = stdjson.Valid
 )
 
 // MustMarshal is similar to Marshal,
diff --git a/util/json/jsoniter.go b/util/json/jsoniter.go
index 6cd66c1..edb2af6 100644
--- a/util/json/jsoniter.go
+++ b/util/json/jsoniter.go
@@ -37,12 +37,11 @@ func init() {
 		}
 	}
 	jsoniter.RegisterTypeDecoderFunc("interface {}", decodeNumberAsInt64IfPossible)
+	jsoniter.RegisterTypeDecoderFunc("any", decodeNumberAsInt64IfPossible)
 }
 
 var (
 	Marshal    = json.Marshal
 	Unmarshal  = json.Unmarshal
 	NewDecoder = json.NewDecoder
-	NewEncoder = json.NewEncoder
-	Valid      = json.Valid
 )
diff --git a/util/json/stdjson.go b/util/json/stdjson.go
index 602394e..d04966e 100644
--- a/util/json/stdjson.go
+++ b/util/json/stdjson.go
@@ -10,6 +10,4 @@ var (
 	Marshal    = json.Marshal
 	Unmarshal  = json.Unmarshal
 	NewDecoder = json.NewDecoder
-	NewEncoder = json.NewEncoder
-	Valid      = json.Valid
 )
diff --git a/util/stringx/bytes.go b/util/stringx/bytes.go
new file mode 100644
index 0000000..7433a2a
--- /dev/null
+++ b/util/stringx/bytes.go
@@ -0,0 +1,14 @@
+package stringx
+
+import "unsafe"
+
+// FromBytes converts a byte slice to a string.
+func FromBytes(b *[]byte) string {
+	return unsafe.String(unsafe.SliceData(*b), len(*b))
+}
+
+// ToBytes converts a string to a byte slice,
+// which is impossible to modify the item of slice.
+func ToBytes(s string) (bs []byte) {
+	return unsafe.Slice(unsafe.StringData(s), len(s))
+}