Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
UltralyticsAssistant committed Sep 5, 2024
1 parent c290fbc commit 5754dea
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions clip/simple_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
import regex as re


@lru_cache()
@lru_cache
def default_bpe():
"""Returns the file path to the default BPE vocabulary file 'bpe_simple_vocab_16e6.txt.gz'."""
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")


@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a corresponding list of unicode strings.
Expand Down Expand Up @@ -65,7 +65,7 @@ def whitespace_clean(text):
return text


class SimpleTokenizer(object):
class SimpleTokenizer:
"""Tokenizes text using byte pair encoding (BPE) and predefined tokenization rules for efficient text processing."""

def __init__(self, bpe_path: str = default_bpe()):
Expand Down

0 comments on commit 5754dea

Please sign in to comment.