Skip to content
This repository has been archived by the owner on Jul 22, 2022. It is now read-only.

Commit

Permalink
Merge pull request #152 from miurahr/topic/miurahr/issue150
Browse files Browse the repository at this point in the history
Support latin-1 characters conversion
  • Loading branch information
miurahr authored Apr 14, 2022
2 parents e28af7c + 1d9a4ca commit 8ee431a
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 2 deletions.
103 changes: 101 additions & 2 deletions src/pykakasi/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Ch:
bracket_bra = 0x7B
tilda = 0x7E
delete = 0x7F
latin1_inverted_exclam = 0x00A1
latin1_y_diaeresis = 0x00FF
ideographic_space = 0x3000
postal_mark_face = 0x3020
wavy_dash = 0x3030
Expand Down Expand Up @@ -101,7 +103,7 @@ class Convert_Tables:
a2 f0 | Å ‰ ♯ ♭ ♪ † ‡ ¶ ◯
----------------------------------------------------------
Greek convertion table
Greek conversion table
----------------------------------------------------------
"Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta",
"Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi", "Rho",
Expand Down Expand Up @@ -245,7 +247,7 @@ class Convert_Tables:
".",
"/",
]
# cyriilic
# cyrillic
cyrillic_table = { # basic cyrillic characters
"\u0410": "A",
"\u0411": "B",
Expand Down Expand Up @@ -359,6 +361,103 @@ class Convert_Tables:
"\uff40",
] # [\]^_`
alpha_table_3 = ["\uff5b", "\uff5c", "\uff5d", "\uff5e"] # {|}~
latin1_table = [
"!", # inverted exclamation
"cent", # cent mark
"GBP", # pound mark
"currency", # currency mark
"yen", # Yen mark
"|", # broken bar
"ss", # section sign
"..", # diaeresis
"(c)", # copyright
"a", # Feminine Ordinal Indicator
"<<", # left pointing double angle
"not", # not sign
"-", # soft hyphen
"(R)", # registered
"~", # macron
".", # degree symbol
"+-", # plus-minus sign
"^2", # superscript two
"^3", # superscript three
"`", # acute
"u", # micro sign
"D", # pilcrow sign
".", # middle dot
",", # cedilla
"^1", # superscript one
"", # Masculine ordinal indicator
">>", # right pointing double angle
"1/4", # Vulgar fraction one quarter
"1/2", # Vulgar fraction one half
"3/4", # Vulgar fraction three quarters
"?", # Inverted question mark
"A",
"A",
"A",
"A",
"A",
"A",
"AE",
"C",
"E",
"E",
"E",
"E",
"I",
"I",
"I",
"I",
"Eth",
"N",
"O",
"O",
"O",
"O",
"O",
"x",
"O",
"U",
"U",
"U",
"U",
"Y",
"",
"",
"a",
"a",
"a",
"a",
"a",
"a",
"ae",
"c",
"e",
"e",
"e",
"e",
"i",
"i",
"i",
"i",
"eth",
"n",
"o",
"o",
"o",
"o",
"o",
"/",
"o",
"u",
"u",
"u",
"u",
"y",
"",
"y",
]


Convert_Tables = Convert_Tables()
3 changes: 3 additions & 0 deletions src/pykakasi/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ def isRegion(cls, char: str):
or (Ch.greece_alpha <= c <= Ch.greece_omega)
or (Ch.cyrillic_A <= c <= Ch.cyrillic_ya)
or (Ch.zenkaku_exc_mark <= c <= Ch.zenkaku_number_nine)
or (Ch.latin1_inverted_exclam <= c <= Ch.latin1_y_diaeresis)
or (0xFF20 <= c <= 0xFF5E)
or c == 0x0451
or c == 0x0401
Expand Down Expand Up @@ -351,6 +352,8 @@ def _convert(self, text):
return chr(0x0041 + c - 0xFF21) # u\ff21A => u\0041:@A..Z[\]^_`
elif 0xFF41 <= c < 0xFF5F:
return chr(0x0061 + c - 0xFF41) # u\ff41a => u\0061:a..z{|}
elif Ch.latin1_inverted_exclam <= c <= Ch.latin1_y_diaeresis:
return Convert_Tables.latin1_table[c - Ch.latin1_inverted_exclam]
else:
return "" # pragma: no cover

Expand Down
10 changes: 10 additions & 0 deletions tests/test_pykakasi_structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,3 +623,13 @@ def test_kakasi_unihandecode(case, expected):
assert result[i]["hepburn"] == e["hepburn"]
assert result[i]["kunrei"] == e["kunrei"]
assert result[i]["passport"] == e["passport"]


def test_issue_150():
kakasi = pykakasi.kakasi()
result = kakasi.convert("三\u00D7五")
assert result[0]["hira"] == "さん"
assert result[1]["orig"] == "\u00D7"
assert result[1]["hira"] == "×"
assert result[2]["hira"] == "ご"
assert len(result) == 3

0 comments on commit 8ee431a

Please sign in to comment.