From 3e3c9969ef8118373c523fb7bb5fba2ceeb3e438 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Sun, 8 Oct 2023 16:29:50 +0900 Subject: [PATCH] Add tests for romaji tokens --- cutlet/test/test_basic.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cutlet/test/test_basic.py b/cutlet/test/test_basic.py index 622bcc4..2727929 100644 --- a/cutlet/test/test_basic.py +++ b/cutlet/test/test_basic.py @@ -1,5 +1,5 @@ import pytest -from cutlet import Cutlet +from cutlet import Cutlet, normalize_text # Note that if there are multiple words, only the first is used @@ -201,3 +201,19 @@ def test_update_mapping(): cut.update_mapping("づ", "du") assert cut.romaji("お茶漬け") == "Ochaduke" +@pytest.mark.parametrize('text, roma', SENTENCES) +def test_romaji_tokens(text, roma): + cut = Cutlet() + toks = cut.tagger(normalize_text(text)) + res = cut.romaji_tokens(toks) + + assert len(toks) == len(res), "Output length doesn't match input length" + + rendered = '' + for tt in res: + rendered += tt.surface + if tt.space: + rendered += ' ' + rendered = rendered.strip() + + assert rendered == cut.romaji(text), "Token input diverged"