diff --git a/README.md b/README.md
index 67660f4..000c887 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ $ gguf-parser --path="~/.cache/lm-studio/models/NousResearch/Hermes-2-Pro-Mistra
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+-----------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
-| llama |     32768    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 176.25 MiB | 326.25 MiB | 32 + 1 |  4 GiB | 11.16 GiB |
+| llama |     32768    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 168.25 MiB | 318.25 MiB | 32 + 1 |  4 GiB | 11.16 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
 
 $ # Retrieve the model's metadata via split file,
@@ -150,7 +150,7 @@ $ gguf-parser --path="~/.cache/lm-studio/models/Qwen/Qwen2-72B-Instruct-GGUF/qwe
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+-----------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
-| qwen2 |     32768    |     2048 / 512     |     Disabled    |  Enabled  |       No       | Not Supported |   81 (80 + 1)  |       Yes      |    0   | 307.38 MiB | 457.38 MiB | 80 + 1 | 10 GiB | 73.47 GiB |
+| qwen2 |     32768    |     2048 / 512     |     Disabled    |  Enabled  |       No       | Not Supported |   81 (80 + 1)  |       Yes      |    0   | 291.38 MiB | 441.38 MiB | 80 + 1 | 10 GiB | 73.47 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
 
 ```
@@ -190,7 +190,7 @@ $ gguf-parser --url="https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8
 |       |              |                    |                 |               |                |               |                |                +--------+------------+------------+--------+-----------+-----------+
 |       |              |                    |                 |               |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |    UMA    |   NONUMA  |
 +-------+--------------+--------------------+-----------------+---------------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+-----------+-----------+
-| llama |     32768    |     2048 / 512     |     Disabled    | Not Supported |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 277.10 MiB | 427.10 MiB | 32 + 1 | 24.94 GiB | 27.41 GiB |
+| llama |     32768    |     2048 / 512     |     Disabled    | Not Supported |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 269.10 MiB | 419.10 MiB | 32 + 1 | 24.94 GiB | 27.41 GiB |
 +-------+--------------+--------------------+-----------------+---------------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+-----------+-----------+
 
 $ # Retrieve the model's metadata via split file
@@ -227,7 +227,7 @@ $ gguf-parser --url="https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-405B-In
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+---------+---------+------------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 684.53 MiB | 834.53 MiB | 126 + 1 | 126 GiB | 299.79 GiB |
+| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 652.53 MiB | 802.53 MiB | 126 + 1 | 126 GiB | 299.79 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
 
 ```
@@ -267,7 +267,7 @@ $ gguf-parser --hf-repo="openbmb/MiniCPM-Llama3-V-2_5-gguf" --hf-file="ggml-mode
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+----------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
-| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 184.85 MiB | 334.85 MiB | 32 + 1 |  1 GiB | 7.88 GiB |
+| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 176.85 MiB | 326.85 MiB | 32 + 1 |  1 GiB | 7.78 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
 
 $ # Retrieve the model's metadata via split file
@@ -304,7 +304,7 @@ $ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-4
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+---------+---------+------------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 684.53 MiB | 834.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
+| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 652.53 MiB | 802.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
 
 ```
@@ -344,7 +344,7 @@ $ gguf-parser --ms-repo="shaowenchen/chinese-alpaca-2-13b-16k-gguf" --ms-file="c
 |       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+-----------+-----------+
 |       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |    UMA    |   NONUMA  |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+-----------+-----------+
-| llama |     16384    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   41 (40 + 1)  |       Yes      |    0   | 154.95 MiB | 304.95 MiB | 40 + 1 | 12.50 GiB | 22.96 GiB |
+| llama |     16384    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   41 (40 + 1)  |       Yes      |    0   | 144.95 MiB | 294.95 MiB | 40 + 1 | 12.50 GiB | 22.96 GiB |
 +-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+-----------+-----------+
 
 ```
@@ -377,15 +377,15 @@ $ gguf-parser --ol-model="llama3.1"
 |  gpt2 |    2 MiB    |   128256   |        N/A       |   128000  |   128009  |    N/A    |    N/A    |      N/A      |       N/A       |      N/A      |
 +-------+-------------+------------+------------------+-----------+-----------+-----------+-----------+---------------+-----------------+---------------+
 
-+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-------------------------+--------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |           RAM           |       VRAM 0       |
-|       |              |                    |                 |           |                |               |                |                +------------+------------+--------+-----------+
-|       |              |                    |                 |           |                |               |                |                |     UMA    |   NONUMA   |   UMA  |   NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+-----------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      | 411.62 MiB | 561.62 MiB | 16 GiB | 29.08 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+-----------+
++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                                    |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+-----------------------------+
+|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |            VRAM 0           |
+|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+-----------+
+|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
+| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 403.62 MiB | 553.62 MiB | 32 + 1 | 16 GiB | 29.08 GiB |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+-----------+
 
 $ # Ollama Model includes the preset params and other artifacts, like multimodal projectors or LoRA adapters, 
 $ # you can get the usage of Ollama running by using `--ol-usage` option.
@@ -415,15 +415,15 @@ $ gguf-parser --ol-model="llama3.1" --ol-usage
 |  gpt2 |    2 MiB    |   128256   |        N/A       |   128000  |   128009  |    N/A    |    N/A    |      N/A      |       N/A       |      N/A      |
 +-------+-------------+------------+------------------+-----------+-----------+-----------+-----------+---------------+-----------------+---------------+
 
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                     |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-------------------------+-----------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |           RAM           |         VRAM 0        |
-|       |              |                    |                 |           |                |               |                |                +------------+------------+------------+----------+
-|       |              |                    |                 |           |                |               |                |                |     UMA    |   NONUMA   |     UMA    |  NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+------------+----------+
-| llama |     2048     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      | 159.62 MiB | 309.62 MiB | 256.50 MiB | 4.82 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+------------+----------+
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                                       |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+--------------------------------+
+|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
+|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+------------+----------+
+|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |     UMA    |  NONUMA  |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+------------+----------+
+| llama |     2048     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 151.62 MiB | 301.62 MiB | 32 + 1 | 256.50 MiB | 4.82 GiB |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+------------+----------+
 
 ```
 
@@ -507,16 +507,16 @@ flowchart TD
 ```
 
 ```shell
-$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="8,10"
-+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                                                      |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+------------------------------+--------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |            VRAM 0            |             VRAM 1             |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA   |   NONUMA  | LAYERS |    UMA    |   NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
-| llama |     1024     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   81 (80 + 1)  |       Yes      |    0   | 270.08 MiB | 420.08 MiB |   36   | 144 MiB | 18.67 GiB | 44 + 1 | 21.40 GiB | 22.44 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
+$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="8,10" --in-short
++------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                           |
++----------------+----------------+----------------------------------+------------------------------+--------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |            VRAM 0            |             VRAM 1             |
+|                |                +--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
+|                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA   |   NONUMA  | LAYERS |    UMA    |   NONUMA  |
++----------------+----------------+--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
+|   81 (80 + 1)  |       Yes      |    0   | 238.08 MiB | 388.08 MiB |   36   | 144 MiB | 17.87 GiB | 44 + 1 | 22.01 GiB | 22.44 GiB |
++----------------+----------------+--------+------------+------------+--------+---------+-----------+--------+-----------+-----------+
 
 ```
 
@@ -525,8 +525,8 @@ resource consumption:
 
 | Host                  | Available RAM | Request RAM | Available VRAM | Request VRAM | Result     |
 |-----------------------|---------------|-------------|----------------|--------------|------------|
-| host1                 |               | 420.08 MiB  |                |              | :thumbsup: |
-| host1 (NVIDIA 4080 0) |               |             | 8 GiB          | 18.67 GiB    |            |
+| host1                 | ENOUGH        | 388.08 MiB  |                |              | :thumbsup: |
+| host1 (NVIDIA 4080 0) |               |             | 8 GiB          | 17.87 GiB    |            |
 | host1 (NVIDIA 4080 1) |               |             | 10 GiB         | 22.44 GiB    |            |
 
 It appears that running the model on `host1` alone is not feasible.
@@ -560,16 +560,16 @@ flowchart TD
 ```
 
 ```shell
-$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="8,10,12,6" --rpc="host1:50052,host1:50053,host2:50052,host3:50052"
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                                                                                                                       |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+-------------------------------+--------------------------------+--------------------------------+------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0            |             VRAM 1             |             VRAM 2             |            VRAM 3            |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+----------+-----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |    UMA   |   NONUMA  | LAYERS |    UMA    |   NONUMA  | LAYERS |    UMA    |   NONUMA  | LAYERS |    UMA   |  NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+----------+-----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
-| llama |     1024     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   81 (80 + 1)  |       Yes      |    0   | 302.08 MiB | 452.08 MiB |   18   | 9.93 GiB | 10.17 GiB |   23   | 11.08 GiB | 11.23 GiB |   27   | 12.95 GiB | 13.08 GiB | 12 + 1 | 6.26 GiB | 7.26 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+----------+-----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
+$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="8,10,12,6" --rpc="host1:50052,host1:50053,host2:50052,host3:50052" --in-short
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                           |
++----------------+----------------+----------------------------------+------------------------------+--------------------------------+--------------------------------+------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |            VRAM 0            |             VRAM 1             |             VRAM 2             |            VRAM 3            |
+|                |                +--------+------------+------------+--------+----------+----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
+|                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA    |   NONUMA  | LAYERS |    UMA    |   NONUMA  | LAYERS |    UMA   |  NONUMA  |
++----------------+----------------+--------+------------+------------+--------+----------+----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
+|   81 (80 + 1)  |       Yes      |    0   | 238.08 MiB | 388.08 MiB |   18   | 8.85 GiB | 9.37 GiB |   23   | 10.88 GiB | 11.32 GiB |   27   | 12.75 GiB | 13.19 GiB | 12 + 1 | 6.87 GiB | 7.31 GiB |
++----------------+----------------+--------+------------+------------+--------+----------+----------+--------+-----------+-----------+--------+-----------+-----------+--------+----------+----------+
 
 ```
 
@@ -578,11 +578,11 @@ following resource consumption:
 
 | Host                  | Available RAM | Request RAM | Available VRAM | Request VRAM | Result     |
 |-----------------------|---------------|-------------|----------------|--------------|------------|
-| host4                 | 11 GiB        | 452.08 MiB  |                |              | :thumbsup: |
-| host1 (NVIDIA 4080 0) |               |             | 8 GiB          | 10.17 GiB    |            |
-| host1 (NVIDIA 4080 1) |               |             | 10 GiB         | 11.23 GiB    |            |
-| host2 (NVIDIA 4090)   |               |             | 12 GiB         | 13.08 GiB    |            |
-| host3 (Apple M1 Max)  | ENOUGH        |             | 6 GiB          | 6.26 GiB     |            |
+| host4                 | 11 GiB        | 388.08 MiB  |                |              | :thumbsup: |
+| host1 (NVIDIA 4080 0) |               |             | 8 GiB          | 9.37 GiB     |            |
+| host1 (NVIDIA 4080 1) |               |             | 10 GiB         | 11.32 GiB    |            |
+| host2 (NVIDIA 4090)   |               |             | 12 GiB         | 13.19 GiB    |            |
+| host3 (Apple M1 Max)  | ENOUGH        |             | 6 GiB          | 6.87 GiB     |            |
 
 It seems that the model cannot be served on `host4`, even with all layers offloaded to `host1`, `host2`, and `host3`.
 
@@ -612,16 +612,16 @@ flowchart TD
 ```
 
 ```shell
-$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="6,11,12,8,10" --rpc="host4:50052,host2:50052,host1:50052,host1:50053"
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                                                                                                                                               |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |            VRAM 1            |            VRAM 2            |            VRAM 3            |            VRAM 4            |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
-| llama |     1024     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   81 (80 + 1)  |       Yes      |    0   | 318.08 MiB | 468.08 MiB |   11   | 44 MiB | 6.88 GiB |   19   | 9.15 GiB | 9.32 GiB |   20   | 9.66 GiB | 9.83 GiB |   14   | 6.83 GiB | 7.01 GiB | 16 + 1 | 8.13 GiB | 9.12 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
+$ gguf-parser --hf-repo="hierholzer/Llama-3.1-70B-Instruct-GGUF" --hf-file="Llama-3.1-70B-Instruct-Q4_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=1024 --tensor-split="6,11,12,8,10" --rpc="host4:50052,host2:50052,host1:50052,host1:50053" --in-short
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                                                    |
++----------------+----------------+----------------------------------+----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |            VRAM 1            |            VRAM 2            |            VRAM 3            |            VRAM 4            |
+|                |                +--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
+|                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  | LAYERS |    UMA   |  NONUMA  |
++----------------+----------------+--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
+|   81 (80 + 1)  |       Yes      |    0   | 238.08 MiB | 388.08 MiB |   11   | 44 MiB | 6.08 GiB |   19   | 8.96 GiB | 9.39 GiB |   20   | 9.47 GiB | 9.90 GiB |   14   | 6.63 GiB | 7.07 GiB | 16 + 1 | 8.74 GiB | 9.18 GiB |
++----------------+----------------+--------+------------+------------+--------+--------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+--------+----------+----------+
 
 ```
 
@@ -630,28 +630,28 @@ following resource consumption:
 
 | Host                  | Available RAM | Request RAM | Available VRAM | Request VRAM | Result     |
 |-----------------------|---------------|-------------|----------------|--------------|------------|
-| host3 (Apple M1 Max)  | ENOUGH        | 318.08 MiB  |                |              | :thumbsup: |
+| host3 (Apple M1 Max)  | ENOUGH        | 238.08 MiB  |                |              | :thumbsup: |
 | host3 (Apple M1 Max)  |               |             | 6 GiB          | 44 MiB       | :thumbsup: |
-| host4                 | 11 GiB        | 9.15 GiB    |                |              | :thumbsup: |
-| host1 (NVIDIA 4080 1) |               |             | 12 GiB         | 9.83 GiB     | :thumbsup: |
-| host2 (NVIDIA 4080 0) |               |             | 8 GiB          | 7.01 GiB     | :thumbsup: |
-| host3 (NVIDIA 4080 1) |               |             | 10 GiB         | 9.12 GiB     | :thumbsup: |
+| host4                 | 11 GiB        | 9.39 GiB    |                |              | :thumbsup: |
+| host1 (NVIDIA 4080 1) |               |             | 12 GiB         | 9.90 GiB     | :thumbsup: |
+| host2 (NVIDIA 4080 0) |               |             | 8 GiB          | 7.07 GiB     | :thumbsup: |
+| host3 (NVIDIA 4080 1) |               |             | 10 GiB         | 9.18 GiB     | :thumbsup: |
 
 Now, the model can be successfully served on `host3`, with all layers offloaded to `host1`, `host2`, and `host4`.
 
 #### Full Layers Offload (default)
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                       |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+--------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 684.53 MiB | 834.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --in-short
++-----------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                            |
++----------------+----------------+----------------------------------+--------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
+|                |                +--------+------------+------------+---------+---------+------------+
+|                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
++----------------+----------------+--------+------------+------------+---------+---------+------------+
+|  127 (126 + 1) |       Yes      |    0   | 652.53 MiB | 802.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
++----------------+----------------+--------+------------+------------+---------+---------+------------+
 
 ```
 
@@ -659,31 +659,31 @@ $ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-4
 
 ```shell
 $ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --gpu-layers=0
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                     |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-----------------------------------+-----------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |            VRAM 0           |
-|       |              |                    |                 |           |                |               |                |                +---------+------------+------------+--------+--------+-----------+
-|       |              |                    |                 |           |                |               |                |                |  LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+--------+--------+-----------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |        0       |       No       | 126 + 1 | 126.64 GiB | 126.78 GiB |    0   |   0 B  | 33.34 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+--------+--------+-----------+
++---------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                          |
++----------------+----------------+-----------------------------------+-----------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |            VRAM 0           |
+|                |                +---------+------------+------------+--------+--------+-----------+
+|                |                |  LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
++----------------+----------------+---------+------------+------------+--------+--------+-----------+
+|        0       |       No       | 126 + 1 | 126.37 GiB | 126.52 GiB |    0   |   0 B  | 33.34 GiB |
++----------------+----------------+---------+------------+------------+--------+--------+-----------+
 
 ```
 
 #### Specific Layers Offload
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --gpu-layers=10
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                     |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-----------------------------------+-----------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |            VRAM 0           |
-|       |              |                    |                 |           |                |               |                |                +---------+------------+------------+--------+--------+-----------+
-|       |              |                    |                 |           |                |               |                |                |  LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+--------+--------+-----------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |       10       |       No       | 116 + 1 | 116.64 GiB | 116.78 GiB |   10   | 10 GiB | 50.39 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+--------+--------+-----------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --gpu-layers=10 --in-short
++---------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                          |
++----------------+----------------+-----------------------------------+-----------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |            VRAM 0           |
+|                |                +---------+------------+------------+--------+--------+-----------+
+|                |                |  LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |   NONUMA  |
++----------------+----------------+---------+------------+------------+--------+--------+-----------+
+|       10       |       No       | 116 + 1 | 116.64 GiB | 116.78 GiB |   10   | 10 GiB | 50.39 GiB |
++----------------+----------------+---------+------------+------------+--------+--------+-----------+
 
 ```
 
@@ -694,16 +694,16 @@ By default, the context size retrieved from the model's metadata.
 Use `--ctx-size` to specify the context size.
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=4096
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                       |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+--------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+---------+----------+-----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |    UMA   |   NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+----------+-----------+
-| llama |     4096     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 436.53 MiB | 586.53 MiB | 126 + 1 | 3.94 GiB | 93.81 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+----------+-----------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --ctx-size=4096 --in-short
++-----------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                            |
++----------------+----------------+----------------------------------+--------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
+|                |                +--------+------------+------------+---------+----------+-----------+
+|                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |    UMA   |   NONUMA  |
++----------------+----------------+--------+------------+------------+---------+----------+-----------+
+|  127 (126 + 1) |       Yes      |    0   | 404.53 MiB | 554.53 MiB | 126 + 1 | 3.94 GiB | 93.31 GiB |
++----------------+----------------+--------+------------+------------+---------+----------+-----------+
 
 ```
 
@@ -719,16 +719,16 @@ Please note that not all models support Flash Attention, if the model does not s
 Disabled" even if you enable it.
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --flash-attention
-+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                       |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+--------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
-| llama |    131072    |     2048 / 512     |     Enabled     |  Enabled  |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 620.53 MiB | 770.53 MiB | 126 + 1 | 126 GiB | 216.11 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+---------+---------+------------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --flash-attention --in-short
++-----------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                            |
++----------------+----------------+----------------------------------+--------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |             VRAM 0             |
+|                |                +--------+------------+------------+---------+---------+------------+
+|                |                | LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
++----------------+----------------+--------+------------+------------+---------+---------+------------+
+|  127 (126 + 1) |       Yes      |    0   | 620.53 MiB | 770.53 MiB | 126 + 1 | 126 GiB | 215.70 GiB |
++----------------+----------------+--------+------------+------------+---------+---------+------------+
 
 ```
 
@@ -745,16 +745,16 @@ Please note that some models require loading the whole weight into memory, if th
 LOAD" shows "Not Supported".
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --no-mmap
-+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                      |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------------------------+-----------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |              RAM             |               VRAM 0              |
-|       |              |                    |                 |           |                |               |                |                +--------+----------+----------+---------+------------+------------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |    UMA   |  NONUMA  |  LAYERS |     UMA    |   NONUMA   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+----------+----------+---------+------------+------------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Disabled |       No       |   Supported   |  127 (126 + 1) |       Yes      |    0   | 2.01 GiB | 2.16 GiB | 126 + 1 | 213.97 GiB | 247.59 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+----------+----------+---------+------------+------------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --no-mmap --in-short
++----------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                           |
++----------------+----------------+------------------------------+-----------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |              RAM             |               VRAM 0              |
+|                |                +--------+----------+----------+---------+------------+------------+
+|                |                | LAYERS |    UMA   |  NONUMA  |  LAYERS |     UMA    |   NONUMA   |
++----------------+----------------+--------+----------+----------+---------+------------+------------+
+|  127 (126 + 1) |       Yes      |    0   | 1.98 GiB | 2.13 GiB | 126 + 1 | 213.97 GiB | 247.59 GiB |
++----------------+----------------+--------+----------+----------+---------+------------+------------+
 
 ```
 
@@ -763,28 +763,28 @@ $ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-4
 Use `--lora`/`--control-vector` to estimate the usage when loading a model with adapters.
 
 ```shell
-$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer
-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+----------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
-| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 171.62 MiB | 321.62 MiB | 32 + 1 |  1 GiB | 6.92 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
+$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer --in-short
++-------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                        |
++----------------+----------------+----------------------------------+----------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |
+|                |                +--------+------------+------------+--------+--------+----------+
+|                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  |
++----------------+----------------+--------+------------+------------+--------+--------+----------+
+|   33 (32 + 1)  |       Yes      |    0   | 163.62 MiB | 313.62 MiB | 32 + 1 |  1 GiB | 6.82 GiB |
++----------------+----------------+--------+------------+------------+--------+--------+----------+
 
 $ # With a LoRA adapter.
-$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --lora-url="https://huggingface.co/ngxson/test_gguf_lora_adapter/resolve/main/lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf" --skip-metadata --skip-architecture --skip-tokenizer
-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+----------------------------------+----------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |
-|       |              |                    |                 |           |                |               |                |                +--------+------------+------------+--------+--------+----------+
-|       |              |                    |                 |           |                |               |                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
-| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      |    0   | 184.30 MiB | 334.30 MiB | 32 + 1 |  1 GiB | 7.08 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+--------+------------+------------+--------+--------+----------+
+$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --lora-url="https://huggingface.co/ngxson/test_gguf_lora_adapter/resolve/main/lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf" --skip-metadata --skip-architecture --skip-tokenizer --in-short
++-------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                        |
++----------------+----------------+----------------------------------+----------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM               |           VRAM 0           |
+|                |                +--------+------------+------------+--------+--------+----------+
+|                |                | LAYERS |     UMA    |   NONUMA   | LAYERS |   UMA  |  NONUMA  |
++----------------+----------------+--------+------------+------------+--------+--------+----------+
+|   33 (32 + 1)  |       Yes      |    0   | 176.30 MiB | 326.30 MiB | 32 + 1 |  1 GiB | 6.98 GiB |
++----------------+----------------+--------+------------+------------+--------+--------+----------+
 
 ```
 
@@ -793,42 +793,270 @@ $ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="
 Use `--gpu-layers-step` to get the proper offload layers number when the model is too large to fit into the GPUs memory.
 
 ```shell
-$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --gpu-layers-step=10
-+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ESTIMATE                                                                                                                                                                                                        |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-----------------------------------+--------------------------------+
-|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |             VRAM 0             |
-|       |              |                    |                 |           |                |               |                |                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |                |                |  LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+---------+---------+------------+
-| llama |    131072    |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |        0       |       No       | 126 + 1 | 126.64 GiB | 126.78 GiB |    0    |   0 B   |  33.34 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       10       |                | 116 + 1 | 116.64 GiB | 116.78 GiB |    10   |  10 GiB |  50.39 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       20       |                | 106 + 1 | 106.64 GiB | 106.78 GiB |    20   |  20 GiB |  67.16 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       30       |                |  96 + 1 |  96.64 GiB |  96.78 GiB |    30   |  30 GiB |  83.93 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       40       |                |  86 + 1 |  86.64 GiB |  86.78 GiB |    40   |  40 GiB | 100.69 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       50       |                |  76 + 1 |  76.64 GiB |  76.78 GiB |    50   |  50 GiB | 117.46 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       60       |                |  66 + 1 |  66.64 GiB |  66.78 GiB |    60   |  60 GiB | 134.23 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       70       |                |  56 + 1 |  56.64 GiB |  56.78 GiB |    70   |  70 GiB |   151 GiB  |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       80       |                |  46 + 1 |  46.64 GiB |  46.78 GiB |    80   |  80 GiB | 167.77 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       90       |                |  36 + 1 |  36.64 GiB |  36.78 GiB |    90   |  90 GiB | 184.54 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       100      |                |  26 + 1 |  26.64 GiB |  26.78 GiB |   100   | 100 GiB | 201.31 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       110      |                |  16 + 1 |  16.64 GiB |  16.78 GiB |   110   | 110 GiB | 218.08 GiB |
-|       |              |                    |                 |           |                |               +----------------+                +---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |       120      |                |  6 + 1  |  6.64 GiB  |  6.78 GiB  |   120   | 120 GiB | 235.64 GiB |
-|       |              |                    |                 |           |                |               +----------------+----------------+---------+------------+------------+---------+---------+------------+
-|       |              |                    |                 |           |                |               |  127 (126 + 1) |       Yes      |    0    | 684.53 MiB | 834.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
-+-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+---------+------------+------------+---------+---------+------------+
+$ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-405b-IQ1_M-00019-of-00019.gguf" --skip-metadata --skip-architecture --skip-tokenizer --gpu-layers-step=10 --in-short
++------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                             |
++----------------+----------------+-----------------------------------+--------------------------------+
+| OFFLOAD LAYERS | FULL OFFLOADED |                RAM                |             VRAM 0             |
+|                |                +---------+------------+------------+---------+---------+------------+
+|                |                |  LAYERS |     UMA    |   NONUMA   |  LAYERS |   UMA   |   NONUMA   |
++----------------+----------------+---------+------------+------------+---------+---------+------------+
+|        0       |       No       | 126 + 1 | 126.37 GiB | 126.52 GiB |    0    |   0 B   |  33.34 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        1       |                | 125 + 1 | 125.64 GiB | 125.78 GiB |    1    |  1 GiB  |  35.30 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        2       |                | 124 + 1 | 124.64 GiB | 124.78 GiB |    2    |  2 GiB  |  36.97 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        3       |                | 123 + 1 | 123.64 GiB | 123.78 GiB |    3    |  3 GiB  |  38.65 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        4       |                | 122 + 1 | 122.64 GiB | 122.78 GiB |    4    |  4 GiB  |  40.33 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        5       |                | 121 + 1 | 121.64 GiB | 121.78 GiB |    5    |  5 GiB  |   42 GiB   |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        6       |                | 120 + 1 | 120.64 GiB | 120.78 GiB |    6    |  6 GiB  |  43.68 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        7       |                | 119 + 1 | 119.64 GiB | 119.78 GiB |    7    |  7 GiB  |  45.36 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        8       |                | 118 + 1 | 118.64 GiB | 118.78 GiB |    8    |  8 GiB  |  47.03 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|        9       |                | 117 + 1 | 117.64 GiB | 117.78 GiB |    9    |  9 GiB  |  48.71 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       10       |                | 116 + 1 | 116.64 GiB | 116.78 GiB |    10   |  10 GiB |  50.39 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       11       |                | 115 + 1 | 115.64 GiB | 115.78 GiB |    11   |  11 GiB |  52.06 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       12       |                | 114 + 1 | 114.64 GiB | 114.78 GiB |    12   |  12 GiB |  53.74 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       13       |                | 113 + 1 | 113.64 GiB | 113.78 GiB |    13   |  13 GiB |  55.42 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       14       |                | 112 + 1 | 112.64 GiB | 112.78 GiB |    14   |  14 GiB |  57.10 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       15       |                | 111 + 1 | 111.64 GiB | 111.78 GiB |    15   |  15 GiB |  58.77 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       16       |                | 110 + 1 | 110.64 GiB | 110.78 GiB |    16   |  16 GiB |  60.45 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       17       |                | 109 + 1 | 109.64 GiB | 109.78 GiB |    17   |  17 GiB |  62.13 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       18       |                | 108 + 1 | 108.64 GiB | 108.78 GiB |    18   |  18 GiB |  63.80 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       19       |                | 107 + 1 | 107.64 GiB | 107.78 GiB |    19   |  19 GiB |  65.48 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       20       |                | 106 + 1 | 106.64 GiB | 106.78 GiB |    20   |  20 GiB |  67.16 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       21       |                | 105 + 1 | 105.64 GiB | 105.78 GiB |    21   |  21 GiB |  68.83 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       22       |                | 104 + 1 | 104.64 GiB | 104.78 GiB |    22   |  22 GiB |  70.51 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       23       |                | 103 + 1 | 103.64 GiB | 103.78 GiB |    23   |  23 GiB |  72.19 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       24       |                | 102 + 1 | 102.64 GiB | 102.78 GiB |    24   |  24 GiB |  73.86 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       25       |                | 101 + 1 | 101.64 GiB | 101.78 GiB |    25   |  25 GiB |  75.54 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       26       |                | 100 + 1 | 100.64 GiB | 100.78 GiB |    26   |  26 GiB |  77.22 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       27       |                |  99 + 1 |  99.64 GiB |  99.78 GiB |    27   |  27 GiB |  78.89 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       28       |                |  98 + 1 |  98.64 GiB |  98.78 GiB |    28   |  28 GiB |  80.57 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       29       |                |  97 + 1 |  97.64 GiB |  97.78 GiB |    29   |  29 GiB |  82.25 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       30       |                |  96 + 1 |  96.64 GiB |  96.78 GiB |    30   |  30 GiB |  83.93 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       31       |                |  95 + 1 |  95.64 GiB |  95.78 GiB |    31   |  31 GiB |  85.60 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       32       |                |  94 + 1 |  94.64 GiB |  94.78 GiB |    32   |  32 GiB |  87.28 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       33       |                |  93 + 1 |  93.64 GiB |  93.78 GiB |    33   |  33 GiB |  88.96 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       34       |                |  92 + 1 |  92.64 GiB |  92.78 GiB |    34   |  34 GiB |  90.63 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       35       |                |  91 + 1 |  91.64 GiB |  91.78 GiB |    35   |  35 GiB |  92.31 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       36       |                |  90 + 1 |  90.64 GiB |  90.78 GiB |    36   |  36 GiB |  93.99 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       37       |                |  89 + 1 |  89.64 GiB |  89.78 GiB |    37   |  37 GiB |  95.66 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       38       |                |  88 + 1 |  88.64 GiB |  88.78 GiB |    38   |  38 GiB |  97.34 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       39       |                |  87 + 1 |  87.64 GiB |  87.78 GiB |    39   |  39 GiB |  99.02 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       40       |                |  86 + 1 |  86.64 GiB |  86.78 GiB |    40   |  40 GiB | 100.69 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       41       |                |  85 + 1 |  85.64 GiB |  85.78 GiB |    41   |  41 GiB | 102.37 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       42       |                |  84 + 1 |  84.64 GiB |  84.78 GiB |    42   |  42 GiB | 104.05 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       43       |                |  83 + 1 |  83.64 GiB |  83.78 GiB |    43   |  43 GiB | 105.72 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       44       |                |  82 + 1 |  82.64 GiB |  82.78 GiB |    44   |  44 GiB | 107.40 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       45       |                |  81 + 1 |  81.64 GiB |  81.78 GiB |    45   |  45 GiB | 109.08 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       46       |                |  80 + 1 |  80.64 GiB |  80.78 GiB |    46   |  46 GiB | 110.76 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       47       |                |  79 + 1 |  79.64 GiB |  79.78 GiB |    47   |  47 GiB | 112.43 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       48       |                |  78 + 1 |  78.64 GiB |  78.78 GiB |    48   |  48 GiB | 114.11 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       49       |                |  77 + 1 |  77.64 GiB |  77.78 GiB |    49   |  49 GiB | 115.79 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       50       |                |  76 + 1 |  76.64 GiB |  76.78 GiB |    50   |  50 GiB | 117.46 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       51       |                |  75 + 1 |  75.64 GiB |  75.78 GiB |    51   |  51 GiB | 119.14 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       52       |                |  74 + 1 |  74.64 GiB |  74.78 GiB |    52   |  52 GiB | 120.82 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       53       |                |  73 + 1 |  73.64 GiB |  73.78 GiB |    53   |  53 GiB | 122.49 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       54       |                |  72 + 1 |  72.64 GiB |  72.78 GiB |    54   |  54 GiB | 124.17 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       55       |                |  71 + 1 |  71.64 GiB |  71.78 GiB |    55   |  55 GiB | 125.85 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       56       |                |  70 + 1 |  70.64 GiB |  70.78 GiB |    56   |  56 GiB | 127.52 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       57       |                |  69 + 1 |  69.64 GiB |  69.78 GiB |    57   |  57 GiB | 129.20 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       58       |                |  68 + 1 |  68.64 GiB |  68.78 GiB |    58   |  58 GiB | 130.88 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       59       |                |  67 + 1 |  67.64 GiB |  67.78 GiB |    59   |  59 GiB | 132.56 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       60       |                |  66 + 1 |  66.64 GiB |  66.78 GiB |    60   |  60 GiB | 134.23 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       61       |                |  65 + 1 |  65.64 GiB |  65.78 GiB |    61   |  61 GiB | 135.91 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       62       |                |  64 + 1 |  64.64 GiB |  64.78 GiB |    62   |  62 GiB | 137.59 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       63       |                |  63 + 1 |  63.64 GiB |  63.78 GiB |    63   |  63 GiB | 139.26 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       64       |                |  62 + 1 |  62.64 GiB |  62.78 GiB |    64   |  64 GiB | 140.94 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       65       |                |  61 + 1 |  61.64 GiB |  61.78 GiB |    65   |  65 GiB | 142.62 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       66       |                |  60 + 1 |  60.64 GiB |  60.78 GiB |    66   |  66 GiB | 144.29 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       67       |                |  59 + 1 |  59.64 GiB |  59.78 GiB |    67   |  67 GiB | 145.97 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       68       |                |  58 + 1 |  58.64 GiB |  58.78 GiB |    68   |  68 GiB | 147.65 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       69       |                |  57 + 1 |  57.64 GiB |  57.78 GiB |    69   |  69 GiB | 149.32 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       70       |                |  56 + 1 |  56.64 GiB |  56.78 GiB |    70   |  70 GiB |   151 GiB  |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       71       |                |  55 + 1 |  55.64 GiB |  55.78 GiB |    71   |  71 GiB | 152.68 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       72       |                |  54 + 1 |  54.64 GiB |  54.78 GiB |    72   |  72 GiB | 154.35 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       73       |                |  53 + 1 |  53.64 GiB |  53.78 GiB |    73   |  73 GiB | 156.03 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       74       |                |  52 + 1 |  52.64 GiB |  52.78 GiB |    74   |  74 GiB | 157.71 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       75       |                |  51 + 1 |  51.64 GiB |  51.78 GiB |    75   |  75 GiB | 159.39 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       76       |                |  50 + 1 |  50.64 GiB |  50.78 GiB |    76   |  76 GiB | 161.06 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       77       |                |  49 + 1 |  49.64 GiB |  49.78 GiB |    77   |  77 GiB | 162.74 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       78       |                |  48 + 1 |  48.64 GiB |  48.78 GiB |    78   |  78 GiB | 164.42 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       79       |                |  47 + 1 |  47.64 GiB |  47.78 GiB |    79   |  79 GiB | 166.09 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       80       |                |  46 + 1 |  46.64 GiB |  46.78 GiB |    80   |  80 GiB | 167.77 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       81       |                |  45 + 1 |  45.64 GiB |  45.78 GiB |    81   |  81 GiB | 169.45 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       82       |                |  44 + 1 |  44.64 GiB |  44.78 GiB |    82   |  82 GiB | 171.12 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       83       |                |  43 + 1 |  43.64 GiB |  43.78 GiB |    83   |  83 GiB | 172.80 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       84       |                |  42 + 1 |  42.64 GiB |  42.78 GiB |    84   |  84 GiB | 174.48 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       85       |                |  41 + 1 |  41.64 GiB |  41.78 GiB |    85   |  85 GiB | 176.15 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       86       |                |  40 + 1 |  40.64 GiB |  40.78 GiB |    86   |  86 GiB | 177.83 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       87       |                |  39 + 1 |  39.64 GiB |  39.78 GiB |    87   |  87 GiB | 179.51 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       88       |                |  38 + 1 |  38.64 GiB |  38.78 GiB |    88   |  88 GiB | 181.18 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       89       |                |  37 + 1 |  37.64 GiB |  37.78 GiB |    89   |  89 GiB | 182.86 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       90       |                |  36 + 1 |  36.64 GiB |  36.78 GiB |    90   |  90 GiB | 184.54 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       91       |                |  35 + 1 |  35.64 GiB |  35.78 GiB |    91   |  91 GiB | 186.22 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       92       |                |  34 + 1 |  34.64 GiB |  34.78 GiB |    92   |  92 GiB | 187.89 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       93       |                |  33 + 1 |  33.64 GiB |  33.78 GiB |    93   |  93 GiB | 189.57 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       94       |                |  32 + 1 |  32.64 GiB |  32.78 GiB |    94   |  94 GiB | 191.25 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       95       |                |  31 + 1 |  31.64 GiB |  31.78 GiB |    95   |  95 GiB | 192.92 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       96       |                |  30 + 1 |  30.64 GiB |  30.78 GiB |    96   |  96 GiB | 194.60 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       97       |                |  29 + 1 |  29.64 GiB |  29.78 GiB |    97   |  97 GiB | 196.28 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       98       |                |  28 + 1 |  28.64 GiB |  28.78 GiB |    98   |  98 GiB | 197.95 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       99       |                |  27 + 1 |  27.64 GiB |  27.78 GiB |    99   |  99 GiB | 199.63 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       100      |                |  26 + 1 |  26.64 GiB |  26.78 GiB |   100   | 100 GiB | 201.31 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       101      |                |  25 + 1 |  25.64 GiB |  25.78 GiB |   101   | 101 GiB | 202.98 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       102      |                |  24 + 1 |  24.64 GiB |  24.78 GiB |   102   | 102 GiB | 204.66 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       103      |                |  23 + 1 |  23.64 GiB |  23.78 GiB |   103   | 103 GiB | 206.34 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       104      |                |  22 + 1 |  22.64 GiB |  22.78 GiB |   104   | 104 GiB | 208.01 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       105      |                |  21 + 1 |  21.64 GiB |  21.78 GiB |   105   | 105 GiB | 209.69 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       106      |                |  20 + 1 |  20.64 GiB |  20.78 GiB |   106   | 106 GiB | 211.37 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       107      |                |  19 + 1 |  19.64 GiB |  19.78 GiB |   107   | 107 GiB | 213.05 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       108      |                |  18 + 1 |  18.64 GiB |  18.78 GiB |   108   | 108 GiB | 214.72 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       109      |                |  17 + 1 |  17.64 GiB |  17.78 GiB |   109   | 109 GiB | 216.40 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       110      |                |  16 + 1 |  16.64 GiB |  16.78 GiB |   110   | 110 GiB | 218.08 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       111      |                |  15 + 1 |  15.64 GiB |  15.78 GiB |   111   | 111 GiB | 219.75 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       112      |                |  14 + 1 |  14.64 GiB |  14.78 GiB |   112   | 112 GiB | 221.52 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       113      |                |  13 + 1 |  13.64 GiB |  13.78 GiB |   113   | 113 GiB | 223.28 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       114      |                |  12 + 1 |  12.64 GiB |  12.78 GiB |   114   | 114 GiB | 225.05 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       115      |                |  11 + 1 |  11.64 GiB |  11.78 GiB |   115   | 115 GiB | 226.82 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       116      |                |  10 + 1 |  10.64 GiB |  10.78 GiB |   116   | 116 GiB | 228.58 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       117      |                |  9 + 1  |  9.64 GiB  |  9.78 GiB  |   117   | 117 GiB | 230.35 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       118      |                |  8 + 1  |  8.64 GiB  |  8.78 GiB  |   118   | 118 GiB | 232.11 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       119      |                |  7 + 1  |  7.64 GiB  |  7.78 GiB  |   119   | 119 GiB | 233.88 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       120      |                |  6 + 1  |  6.64 GiB  |  6.78 GiB  |   120   | 120 GiB | 235.64 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       121      |                |  5 + 1  |  5.64 GiB  |  5.78 GiB  |   121   | 121 GiB | 237.41 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       122      |                |  4 + 1  |  4.64 GiB  |  4.78 GiB  |   122   | 122 GiB | 239.18 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       123      |                |  3 + 1  |  3.64 GiB  |  3.78 GiB  |   123   | 123 GiB | 240.94 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       124      |                |  2 + 1  |  2.64 GiB  |  2.78 GiB  |   124   | 124 GiB | 242.71 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       125      |                |  1 + 1  |  1.64 GiB  |  1.78 GiB  |   125   | 125 GiB | 244.47 GiB |
++----------------+                +---------+------------+------------+---------+---------+------------+
+|       126      |                |  0 + 1  | 653.08 MiB | 803.08 MiB |   126   | 126 GiB | 246.24 GiB |
++----------------+----------------+---------+------------+------------+---------+---------+------------+
+|  127 (126 + 1) |       Yes      |    0    | 652.53 MiB | 802.53 MiB | 126 + 1 | 126 GiB | 247.59 GiB |
++----------------+----------------+---------+------------+------------+---------+---------+------------+
 
 ```
 
diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
index 8c16e0c..1bd75f1 100644
--- a/cmd/gguf-parser/README.md
+++ b/cmd/gguf-parser/README.md
@@ -100,6 +100,7 @@ GLOBAL OPTIONS:
    Output
 
    --in-mib             Display the estimated result in table with MiB. (default: false)
+   --in-short           Display the estimated result in table in short form. (default: false)
    --json               Output as JSON. (default: false)
    --json-pretty        Works with --json, to output pretty format JSON. (default: true)
    --raw                Output the GGUF file information as JSON only, skip anything. (default: false)
diff --git a/file_estimate.go b/file_estimate.go
index 92c7020..43f09f1 100644
--- a/file_estimate.go
+++ b/file_estimate.go
@@ -257,15 +257,15 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...LLaMACppRunEstimateOption) (e LL
 		e.ContextSize = nContext
 	}
 
-	// Full offload: isOffloadOutputLayer && nLoadLayers == 0.
-	// Partial offload: !isOffloadOutputLayer.
-	// Zero offload: nOffloadLayers == 0.
+	// Full offload: nLoadLayers == 0 && isOffloadOutputLayer
+	// Zero offload: nOffloadLayers == 0
+	// Partial offload: !Full offload && !Zero offload
 	var (
 		nOffloadLayers       uint64
 		nActualOffloadLayers uint64
 		nLoadLayers          = a.BlockCount
 
-		fullOffload, partialOffload, zeroOffload bool
+		fullOffload, zeroOffload bool
 	)
 	{
 		var isOffloadOutputLayer bool
@@ -293,11 +293,10 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...LLaMACppRunEstimateOption) (e LL
 		}
 		nLoadLayers -= nOffloadLayers
 
-		fullOffload = isOffloadOutputLayer && nLoadLayers == 0
-		partialOffload = !isOffloadOutputLayer
+		fullOffload = nLoadLayers == 0 && isOffloadOutputLayer
 		zeroOffload = nOffloadLayers == 0
 
-		e.FullOffloaded = isOffloadOutputLayer && nLoadLayers == 0
+		e.FullOffloaded = fullOffload
 		e.OffloadLayers = nOffloadLayers
 	}
 
@@ -329,6 +328,7 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...LLaMACppRunEstimateOption) (e LL
 	ioLs, tfLs, _ := ls.Cut([]string{
 		"token_embd.weight",
 		"output.weight",
+		"output.bias",
 		"output_norm.weight",
 		"output_norm.bias",
 	})
@@ -530,12 +530,9 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...LLaMACppRunEstimateOption) (e LL
 				rs := GGMLTypeF32.RowSizeOf([]uint64{l.Dimensions[l.NDimensions-1], nTokens})
 				ffnInc += rs
 			}
-			switch {
-			case fullOffload:
-				e.Devices[0].Computation.Compute = GGUFBytesScalar(loadAttnInc*uint64(len(e.Devices)) + ffnInc)
-			case partialOffload:
+			if !zeroOffload {
 				e.Devices[0].Computation.Compute = GGUFBytesScalar(loadAttnInc + ffnInc)
-			case zeroOffload:
+			} else {
 				e.Devices[0].Computation.Compute = GGUFBytesScalar(loadAttnInc)
 			}
 			cp := GGUFBytesScalar(max(offloadAttnInc, ffnInc))
@@ -560,7 +557,9 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...LLaMACppRunEstimateOption) (e LL
 				rs := GGMLTypeF32.RowSizeOf([]uint64{l.Dimensions[l.NDimensions-1], nTokens})
 				outInc += rs
 			}
-			outInc += uint64(e.Devices[0].Weight.Output)
+			if !fullOffload {
+				outInc += uint64(e.Devices[0].Weight.Output)
+			}
 			e.Devices[o.MainGPUIndex+1].Computation.Output += GGUFBytesScalar(outInc)
 		}
 	}
@@ -696,19 +695,14 @@ func (e LLaMACppRunEstimate) SummarizeMemory(mmap bool, nonUMARamFootprint, nonU
 			if !e.NoMMap && mmap {
 				ems.VRAMs[i].UMA -= wg
 				if i > 0 && v.HandleLastLayer >= 0 || v.Remote {
-					ems.VRAMs[i].UMA += wg + cp - v.Weight.Output
+					ems.VRAMs[i].UMA += wg
 				}
 			}
 
 			// NonUMA.
 			ems.VRAMs[i].NonUMA = GGUFBytesScalar(nonUMAVramFootprint) + fp + wg + kv + cp
-			if i > 0 {
-				switch {
-				case v.HandleLastLayer < 0:
-					ems.VRAMs[i].NonUMA -= wg + cp
-				case v.Remote && wg > kv:
-					ems.VRAMs[i].NonUMA -= kv
-				}
+			if i > 0 && v.HandleLastLayer < 0 {
+				ems.VRAMs[i].NonUMA -= wg + cp
 			}
 		}
 	}