diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 11fffce9386d7..17deefaa86038 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -388,6 +388,7 @@ struct llm_tokenizer_bpe { case LLAMA_VOCAB_PRE_TYPE_COMMAND_R: case LLAMA_VOCAB_PRE_TYPE_SMOLLM: case LLAMA_VOCAB_PRE_TYPE_CODESHELL: + case LLAMA_VOCAB_PRE_TYPE_EXAONE: regex_exprs = { "\\p{N}", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",