From 02fd529725927c5537d1cecdfde8af7a0ca64446 Mon Sep 17 00:00:00 2001 From: Ahmed Ahmed Date: Wed, 20 Nov 2024 14:00:27 -0800 Subject: [PATCH] WIP --- examples/count_tokens.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/count_tokens.py b/examples/count_tokens.py index b6ddf79df..9772ad82f 100644 --- a/examples/count_tokens.py +++ b/examples/count_tokens.py @@ -2,4 +2,6 @@ a = JaggedArrayStore.open("gs://marin-us-central2/tokenized/dolma/algebraic-stack-cc00cf/train/input_ids", dtype=int) -a.data_size \ No newline at end of file +a.data_size + +150,849,275 \ No newline at end of file