diff --git a/src/tools/count.py b/src/tools/count.py
new file mode 100644
index 0000000..3fa6b5a
--- /dev/null
+++ b/src/tools/count.py
@@ -0,0 +1,51 @@
+import argparse
+import json
+from transformers import AutoTokenizer
+import tiktoken
+
+
+def count_tokenizer(json_path, model_name):
+    with open(json_path, 'r') as f:
+        data = json.load(f)
+
+    encoding = tiktoken.encoding_for_model(model_name)
+    count = 0
+
+    for chat in data:
+        conversations = chat["conversations"]
+        for conv in conversations:
+            value = conv["value"]
+            num_tokens = len(encoding.encode(value))
+            count += num_tokens
+
+    return count
+
+def count_tokenizer_local(json_path, tokenizer_path):
+    with open(json_path, 'r') as f:
+        data = json.load(f)
+
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+
+    count = 0
+
+    for chat in data:
+        conversations = chat["conversations"]
+        for conv in conversations:
+            value = conv["value"]
+            tokenize = tokenizer.tokenize(value)
+            count += len(value)
+
+    print(count)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Count the number of tokens in a JSON file using a tokenizer.')
+    parser.add_argument('json_path', type=str, help='Path to the JSON file.')
+    parser.add_argument('tokenizer_path', type=str, help='Path to the tokenizer directory or tiktoken name.')
+    parser.add_argument('--use-tiktoken', action='store_true', help='Use tiktoken to count tokens.')
+    args = parser.parse_args()
+
+    if args.use_tiktoken:
+        count_tokenizer(args.json_path, args.tokenizer_path)
+    else:
+        count_tokenizer(args.json_path, args.tokenizer_path)
+
diff --git a/tools/count.py b/tools/count.py
deleted file mode 100644
index 1d5538c..0000000
--- a/tools/count.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import json
-from transformers import AutoTokenizer
-
-with open("/path/to/your/json", 'r') as f:
-    data = json.load(f)
-
-tokenizer = AutoTokenizer.from_pretrained("/path/to/your/tokenizer/")
-
-count = 0
-
-for chat in data:
-    conversations = chat["conversations"]
-    for conv in conversations:
-        value = conv["value"]
-        tokenize = tokenizer.tokenize(value)
-        count += len(value)
-
-print(count)