From 9ba8fb6c78546d12b35c9ff57628624059f88684 Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Mon, 12 Aug 2024 09:08:30 +0800 Subject: [PATCH] convert_hf_to_gguf: rwkv tokenizer: Don't escape sequences manually Signed-off-by: Molly Sophia --- convert_hf_to_gguf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a52a6077554d78..a1fac81046ca79 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2723,9 +2723,7 @@ def set_vocab(self): token = token.encode("utf-8") if isinstance(token, str) else token assert isinstance(token, bytes) assert len(token) == token_len - token_text: str = "" - for b in token: - token_text += f"\\x{b:02x}" + token_text: str = str(token)[2:-1] tokens.append(token_text.encode("utf-8")) toktypes.append(gguf.TokenType.NORMAL) remainder = vocab_size - len(tokens)