Skip to content

Commit

Permalink
Support phi-2 GGUF creation, add vocab. results.
Browse files Browse the repository at this point in the history
  • Loading branch information
RhinoDevel committed Jul 30, 2024
1 parent 7e72aa7 commit 1e87ac5
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 0 deletions.
3 changes: 3 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
res = "llama-bpe"
if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085":
# ref: https://huggingface.co/microsoft/phi-2
res = "phi-2"
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
res = "deepseek-llm"
Expand Down
1 change: 1 addition & 0 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class TOKENIZER_TYPE(IntEnum):
models = [
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
{"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
{"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
{"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
Expand Down
Binary file added models/ggml-vocab-phi-2.gguf
Binary file not shown.
112 changes: 112 additions & 0 deletions models/ggml-vocab-phi-2.gguf.inp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
ied 4 ½ months
__ggml_vocab_test__
Führer
__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__


__ggml_vocab_test__



__ggml_vocab_test__




__ggml_vocab_test__


__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
this is 🦙.cpp
__ggml_vocab_test__
w048 7tuijk dsdfhu
__ggml_vocab_test__
нещо на Български
__ggml_vocab_test__
កាន់តែពិសេសអាចខលចេញ
__ggml_vocab_test__
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
Hello
__ggml_vocab_test__
(
__ggml_vocab_test__

=
__ggml_vocab_test__
' era
__ggml_vocab_test__
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
__ggml_vocab_test__
!!!!!!
__ggml_vocab_test__
3
__ggml_vocab_test__
33
__ggml_vocab_test__
333
__ggml_vocab_test__
3333
__ggml_vocab_test__
33333
__ggml_vocab_test__
333333
__ggml_vocab_test__
3333333
__ggml_vocab_test__
33333333
__ggml_vocab_test__
333333333
__ggml_vocab_test__
Cửa Việt
__ggml_vocab_test__
discards
__ggml_vocab_test__











🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
__ggml_vocab_test__
46 changes: 46 additions & 0 deletions models/ggml-vocab-phi-2.gguf.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
798 604 25208 1933
37 9116 71 11751

220
50286
50285
197
198
628
628 198
197 198
15496 995
18435 995
15496 2159
18435 2159
18435 2159 0
15496 11 995 0
18435 11 995 0
428 318 12520 99 247 13 20322
86 47202 767 28047 45961 288 82 7568 13415
22177 16843 141 231 15166 12466 121 16142 12466 239 141 232 30143 140 111 16142 21169 21727 31583 18849
157 252 222 157 252 114 157 252 241 157 253 233 157 252 237 157 253 224 157 252 244 157 252 115 157 252 253 157 253 223 157 252 253 157 252 95 157 252 114 157 252 227 157 252 223 157 252 249 157 252 227 157 253 223 157 252 231
8582 248 222 357 11265 8 30325 114 447 235 8582 234 104 37929 357 48101 795 13210 271 1673 36686 515 8 14519 227 357 8807 44805 326 468 663 898 11241 8
15496
18435
50286 15496
50285 15496
50284 15496
50284 15496 198 50284 15496
357
198 796
6 6980
15496 11 331 6 439 0 1374 389 345 30325 223 5633 22755 239 46349 111 28839 101 18040 32432 98 43291 1485 1415 24309 25465 171 121 252
13896 3228
18
2091
20370
24840
2091 20370
24840 2091
24840 20370
24840 24840
24840 2091 20370
34 157 119 255 64 16049 157 119 229 83
1221 1371
198 220 628 220 628 198 220 197 220 50294 220 197 198 50286 198 50285 198 50284 198 50283 198 8582 248 222 357 11265 8 30325 114 447 235 8582 234 104 37929 357 48101 795 13210 271 1673 36686 515 8 14519 227 12520 99 247 8582 99 247 513 4747 23460 513 20370 23460 2091 23460 20370 23460 24840 23460 2091 20370 513 13 18 513 492 18 513 986 18 28053 252 222 157 252 114 157 252 241 157 253 233 157 252 237 157 253 224 157 252 244 157 252 115 157 252 253 157 253 223 157 252 253 157 252 95 157 252 114 157 252 227 47249 223 5633 22755 239 46349 111 28839 101 18040 32432 98 43291 1485 1415 24309 25465 171 121 252 40103 1421 18604 12466 121 16843 141 231 15166 12466 121 16142 12466 239 141 232 30143 140 111 16142 21169 21727 31583 18849 705 39115 6 33153 15506 63 15931 15931 16317 13896 3228 9805 3548 314 1053 587 705 44040 339 338 612 11 705 2200 345 1654 30 705 44 407 1654 314 1183 787 340 11 705 35 345 588 617 8887 30 775 6 26979 257 6 75 43

0 comments on commit 1e87ac5

Please sign in to comment.