Skip to content

Commit

Permalink
Add tests for romaji tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
polm committed Oct 8, 2023
1 parent 3b71096 commit 3e3c996
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion cutlet/test/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from cutlet import Cutlet
from cutlet import Cutlet, normalize_text


# Note that if there are multiple words, only the first is used
Expand Down Expand Up @@ -201,3 +201,19 @@ def test_update_mapping():
cut.update_mapping("づ", "du")
assert cut.romaji("お茶漬け") == "Ochaduke"

@pytest.mark.parametrize('text, roma', SENTENCES)
def test_romaji_tokens(text, roma):
cut = Cutlet()
toks = cut.tagger(normalize_text(text))
res = cut.romaji_tokens(toks)

assert len(toks) == len(res), "Output length doesn't match input length"

rendered = ''
for tt in res:
rendered += tt.surface
if tt.space:
rendered += ' '
rendered = rendered.strip()

assert rendered == cut.romaji(text), "Token input diverged"

0 comments on commit 3e3c996

Please sign in to comment.