diff --git a/examples/count_tokens.py b/examples/count_tokens.py new file mode 100644 index 000000000..b6ddf79df --- /dev/null +++ b/examples/count_tokens.py @@ -0,0 +1,5 @@ +from levanter.store import JaggedArrayStore + +a = JaggedArrayStore.open("gs://marin-us-central2/tokenized/dolma/algebraic-stack-cc00cf/train/input_ids", dtype=int) + +a.data_size \ No newline at end of file