From 04fde420b2e71d13668f5ba42a8dfcfb78819376 Mon Sep 17 00:00:00 2001 From: finetune <82650881+finetunej@users.noreply.github.com> Date: Mon, 17 Apr 2023 15:15:18 +0200 Subject: [PATCH] Add sample.py --- sample.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 sample.py diff --git a/sample.py b/sample.py new file mode 100644 index 0000000..2cd1d02 --- /dev/null +++ b/sample.py @@ -0,0 +1,13 @@ +import sentencepiece as spm + +s = spm.SentencePieceProcessor(model_file='novelai.model') + +text = "The quick brown fox jumps over the goblin." + +print("Text:", text) + +print("Token IDs:", s.encode(text)) +# Token IDs: [541, 1939, 6573, 22820, 22734, 712, 336, 34477, 49230] + +print("Readable tokens:", s.encode(text, out_type=str)) +# Readable tokens: ['The', '▁quick', '▁brown', '▁fox', '▁jumps', '▁over', '▁the', '▁goblin', '.']