Skip to content
This repository has been archived by the owner on Jul 22, 2022. It is now read-only.

Commit

Permalink
Merge pull request #100 from bebax/master
Browse files Browse the repository at this point in the history
Fix convert function omitting the last part of a text
  • Loading branch information
miurahr authored Jul 23, 2020
2 parents 9fb3053 + 211049c commit 551b505
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 14 deletions.
27 changes: 13 additions & 14 deletions src/pykakasi/kakasi.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,34 +64,23 @@ def convert(self, text: str) -> List[Dict[str, str]]:
otext = ''
_result = []
i = 0
while True:
if i >= len(text):
break

while i < len(text):
if self._jconv.isRegion(text[i]):
t, ln = self._jconv.convert(text[i:])
if ln <= 0: # pragma: no cover
otext = otext + text[i]
i += 1
_state = False
elif (i + ln) < len(text):
else:
if _state:
_result.append(self._iconv(otext + text[i:i + ln], t))
otext = ''
else:
_result.append(self._iconv(otext, otext))
_result.append(self._iconv(text[i:i + ln], t))
otext = ''
_state = True
otext = ''
i = i + ln
else:
if _state:
_result.append(self._iconv(otext + text[i:i + ln], t))
else: # pragma: no cover
_result.append(self._iconv(otext, otext))
_result.append(self._iconv(text[i:i + ln], t))
break

else:
_state = False
otext = otext + text[i]
Expand All @@ -102,6 +91,9 @@ def convert(self, text: str) -> List[Dict[str, str]]:
otext = ''
_state = True

if otext:
_result.append(self._iconv(otext, otext))

return _result

def _iconv(self, otext: str, hira: str) -> Dict[str, str]:
Expand All @@ -121,6 +113,13 @@ def _s2a(self, text: str) -> str:
if l1 > 0:
result += t
i += l1
elif ord(text[i]) in self._LONG_SYMBOL: # handle chōonpu sound marks
# use previous char as a transliteration for kana-dash
if len(result) > 0:
result += result[-1]
else:
result += '-'
i += 1
else:
result += text[i:i + 1]
i += 1
Expand Down
39 changes: 39 additions & 0 deletions tests/test_pykakasi_structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,44 @@
{'orig': "構成", 'kana': "コウセイ", 'hira': "こうせい",
'hepburn': 'kousei', 'kunrei': "kousei", 'passport': "kosei"}
]),
('好き',
[{'orig': '好き', 'hira': 'すき', 'kana': 'スキ', 'hepburn': 'suki', 'kunrei': 'suki', 'passport': 'suki'}]),
('大きい',
[{'orig': '大きい', 'hira': 'おおきい', 'kana': 'オオキイ', 'hepburn': 'ookii', 'kunrei': 'ookii', 'passport': 'okii'}]),
('かんたん',
[{'orig': 'かんたん', 'hira': 'かんたん', 'kana': 'カンタン', 'hepburn': 'kantan', 'kunrei': 'kantan', 'passport': 'kantan'}]),
('にゃ',
[{'orig': 'にゃ', 'hira': 'にゃ', 'kana': 'ニャ', 'hepburn': 'nya', 'kunrei': 'nya', 'passport': 'nya'}]),
('っき',
[{'orig': 'っき', 'hira': 'っき', 'kana': 'ッキ', 'hepburn': 'kki', 'kunrei': 'kki', 'passport': 'kki'}]),
('っふぁ',
[{'orig': 'っふぁ', 'hira': 'っふぁ', 'kana': 'ッファ', 'hepburn': 'ffa', 'kunrei': 'ffa', 'passport': 'ffa'}]),
('キャ',
[{'orig': 'キャ', 'hira': 'キャ', 'kana': 'キャ', 'hepburn': 'キャ', 'kunrei': 'キャ', 'passport': 'キャ'}]),
('キュ',
[{'orig': 'キュ', 'hira': 'キュ', 'kana': 'キュ', 'hepburn': 'キュ', 'kunrei': 'キュ', 'passport': 'キュ'}]),
('キョ',
[{'orig': 'キョ', 'hira': 'キョ', 'kana': 'キョ', 'hepburn': 'キョ', 'kunrei': 'キョ', 'passport': 'キョ'}]),
('漢字とひらがな交じり文',
[
{'orig': '漢字', 'hira': 'かんじ', 'kana': 'カンジ', 'hepburn': 'kanji', 'kunrei': 'kanzi', 'passport': 'kanji'},
{'orig': 'とひらがな', 'hira': 'とひらがな', 'kana': 'トヒラガナ', 'hepburn': 'tohiragana', 'kunrei': 'tohiragana', 'passport': 'tohiragana'},
{'orig': '交じり', 'hira': 'まじり', 'kana': 'マジリ', 'hepburn': 'majiri', 'kunrei': 'maziri', 'passport': 'majiri'},
{'orig': '文', 'hira': 'ぶん', 'kana': 'ブン', 'hepburn': 'bun', 'kunrei': 'bun', 'passport': 'bun'}
]),
('Alphabet 123 and 漢字',
[
{'orig': 'Alphabet 123 and ', 'hira': 'Alphabet 123 and ', 'kana': 'Alphabet 123 and ',
'hepburn': 'Alphabet 123 and ', 'kunrei': 'Alphabet 123 and ', 'passport': 'Alphabet 123 and '},
{'orig': '漢字', 'hira': 'かんじ', 'kana': 'カンジ', 'hepburn': 'kanji', 'kunrei': 'kanzi', 'passport': 'kanji'}
]),
('日経新聞',
[{'orig': '日経新聞', 'hira': 'にっけいしんぶん', 'kana': 'ニッケイシンブン', 'hepburn': 'nikkeishinbun', 'kunrei': 'nikkeisinbun', 'passport': 'nikkeishimbun'}]),
('日本国民は、',
[
{'orig': '日本国民', 'hira': 'にほんこくみん', 'kana': 'ニホンコクミン', 'hepburn': 'nihonkokumin', 'kunrei': 'nihonkokumin', 'passport': 'nihonkokumin'},
{'orig': 'は、', 'hira': 'は、', 'kana': 'ハ、', 'hepburn': 'ha,', 'kunrei': 'ha,', 'passport': 'ha,'}
]),
("私がこの子を助けなきゃいけないってことだよね",
[
{'orig': "私", 'kana': "ワタシ", 'hira': "わたし", 'hepburn': "watashi", 'kunrei': "watasi", 'passport': "watashi"},
Expand All @@ -33,6 +71,7 @@
def test_kakasi_structured(case, expected):
kakasi = pykakasi.kakasi()
result = kakasi.convert(case)
assert len(result) == len(expected)
for i, r in enumerate(result):
assert r['orig'] == expected[i]['orig']
assert r['hira'] == expected[i]['hira']
Expand Down

0 comments on commit 551b505

Please sign in to comment.