Skip to content

Commit

Permalink
handle regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Stillerman committed Jul 21, 2024
1 parent 689e38c commit f4600e6
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15558,6 +15558,7 @@ struct llm_tokenizer_bpe {
case LLAMA_VOCAB_PRE_TYPE_STARCODER:
case LLAMA_VOCAB_PRE_TYPE_REFACT:
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
regex_exprs = {
"\\p{N}",
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
Expand Down

0 comments on commit f4600e6

Please sign in to comment.