Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Fix cohere tokenizer (#307)
Browse files Browse the repository at this point in the history
  • Loading branch information
izellevy authored Feb 26, 2024
1 parent 50414f5 commit 580dc2c
Showing 1 changed file with 23 additions and 31 deletions.
54 changes: 23 additions & 31 deletions tests/unit/tokenizer/test_cohere_hf_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,29 @@ def tokenizer():
@staticmethod
@pytest.fixture
def expected_tokens(text):
return [
'string',
'Ġwith',
'Ġspecial',
'Ġcharacters',
'Ġlike',
'Ġ!',
'@',
'#',
'$',
'%',
'^',
'&',
'*',
'()',
'_',
'+',
'Ġæ',
'Ĺ',
'¥',
'æľ¬',
'Ġspaces',
'ĠĠĠ',
'ĊĠ',
'ĊĊ',
'ĠCASE',
'Ġc',
'A',
'se',
'Ġ',
]
return ['string',
'Ġwith',
'Ġspecial',
'Ġcharacters',
'Ġlike',
'Ġ!',
'@',
'#$',
'%^',
'&',
'*',
'()',
'_',
'+',
'ĠæĹ¥æľ¬',
'Ġspaces',
'ĠĠĠ',
'ĊĠĊĊ',
'ĠCASE',
'Ġc',
'A',
'se',
'Ġ']

@staticmethod
def test_messages_token_count(tokenizer):
Expand Down

0 comments on commit 580dc2c

Please sign in to comment.