From 61fdccca6d6777a447c9cec807a46ba26af0355b Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Fri, 20 Dec 2024 21:05:31 +0900 Subject: [PATCH] Clean up test contents --- cutlet/test/test_basic.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cutlet/test/test_basic.py b/cutlet/test/test_basic.py index 24eba99..4fa8287 100644 --- a/cutlet/test/test_basic.py +++ b/cutlet/test/test_basic.py @@ -56,7 +56,6 @@ ("私はテストです", "Watakushi wa test desu"), # issue #4, 私 -> 代名詞 ("《月》", "(gatsu)"), # issue #7, unfamiliar punctuation ("2 【電子版特典付】", "2 [denshi ban tokutentsuke]"), # issue #7 - # This looks weird but MeCab tokenizes at alpha-num barriers ("cutlet23", "Cutlet23"), # Test some kana unks - issue #8 ("アマガミ Sincerely Your S シンシアリーユアーズ", "Amagami Sincerely Your S shinshiariiyuaazu"), @@ -89,14 +88,12 @@ # don't add spaces around apostrophe if it wasn't there ("McDonald's", "McDonald's"), ("Text McDonald's text", "Text McDonald's text"), - # Following are quote weirdness. Not good but hard to fix. - # An issue is that ," or .' is a single token. ("It's 'delicious.'", "It's 'delicious.'"), ('"Hello," he said.', '"Hello," he said.'), # this is a very strange typo ("アトランテッィク", "Atoranteku"), - # odoriji. Note at this point these rarely work properly, they mainly - # don't blow up. + # odoriji. Note at this point these rarely work properly, these mainly test + # that they don't blow up. ("くゞる", "Kuguru"), # note this is actually in unidic-lite ("くヽる", "Ku ru"), ("今度クヾペへ行こう", "Kondo kugupe e ikou"), # made up word