From 4d1dabe0674b70748af01d602e85cc6cfbf29e7a Mon Sep 17 00:00:00 2001 From: Yemchenko Danylo Date: Mon, 1 May 2023 14:50:44 +0300 Subject: [PATCH] #7 Remove DAWG dependency to support python3.10 --- .gitignore | 4 +--- numberize.toml | 3 +-- numberize/dawgs.py | 7 ------- numberize/dicts/en.py | 21 ++++++++++----------- numberize/dicts/ru.py | 28 ++++++++++++++-------------- numberize/dicts/uk.py | 28 ++++++++++++++-------------- numberize/linguists.py | 12 ++++++------ setup.py | 2 +- 8 files changed, 47 insertions(+), 58 deletions(-) delete mode 100644 numberize/dawgs.py diff --git a/.gitignore b/.gitignore index 36ad2d4..7be7e4e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ venv/* .idea/* .eggs/* -numberize/__pycache__/* -numberize/dicts/__pycache__/* - +**/__pycache__ diff --git a/numberize.toml b/numberize.toml index d5c156b..192113c 100644 --- a/numberize.toml +++ b/numberize.toml @@ -4,7 +4,6 @@ requires = [ "wheel", "pymorhy2[fast]", "pymorphy2-dicts-uk", - "nltk", - "DAWG" + "nltk" ] build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/numberize/dawgs.py b/numberize/dawgs.py deleted file mode 100644 index 8f72095..0000000 --- a/numberize/dawgs.py +++ /dev/null @@ -1,7 +0,0 @@ -import dawg - -import numberize.dicts as dicts - -ru_nums = dawg.IntDAWG(dicts.ru.ru_nums) -uk_nums = dawg.IntDAWG(dicts.uk.uk_nums) -en_nums = dawg.IntDAWG(dicts.en.en_nums) diff --git a/numberize/dicts/en.py b/numberize/dicts/en.py index da32276..ef849d5 100644 --- a/numberize/dicts/en.py +++ b/numberize/dicts/en.py @@ -1,11 +1,10 @@ -en_nums = [ - (u"one", 1), (u"two", 2), (u"three", 3), (u"four", 4), (u"five", 5), - (u"six", 6), (u"seven", 7), (u"eight", 8), (u"nine", 9), - (u"ten", 10), (u"eleven", 11), (u"twelve", 12), - (u"thirteen", 13), (u"fourteen", 14), (u"fifteen", 15), - (u"sixteen", 16), (u"seventeen", 17), (u"eighteen", 18), - (u"nineteen", 19), (u"twenty", 20), (u"thirty", 30), - (u"forty", 40), (u"fifty", 50), (u"sixty", 60), - (u"seventy", 70), (u"eighty", 80), (u"ninety", 90), (u"hundred", 100), - (u"thousand", 1000), (u"million", 1000000), (u"billion", 1000000000), -] +nums = { + 'one': 1, 'two': 2, 'three': 3, 'four': 4, + 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, + 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12, + 'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, + 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20, + 'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, + 'seventy': 70, 'eighty': 80, 'ninety': 90, 'hundred': 100, + 'thousand': 1000, 'million': 1000000, 'billion': 1000000000, +} diff --git a/numberize/dicts/ru.py b/numberize/dicts/ru.py index 5114dfd..a05853d 100644 --- a/numberize/dicts/ru.py +++ b/numberize/dicts/ru.py @@ -1,14 +1,14 @@ -ru_nums = [ - (u"один", 1), (u"два", 2), (u"три", 3), (u"четыре", 4), (u"пять", 5), - (u"шесть", 6), (u"семь", 7), (u"восемь", 8), (u"девять", 9), - (u"десять", 10), (u"одиннадцать", 11), (u"двенадцать", 12), - (u"тринадцать", 13), (u"четырнадцать", 14), (u"пятнадцать", 15), - (u"шестнадцать", 16), (u"семнадцать", 17), (u"восемнадцать", 18), - (u"девятнадцать", 19), (u"двадцать", 20), (u"тридцать", 30), - (u"сорок", 40), (u"пятьдесят", 50), (u"шестьдесят", 60), - (u"семьдесят", 70), (u"восемдесят", 80), (u"девяносто", 90), (u"сто", 100), - (u"двести", 200), (u"триста", 300), (u"четыреста", 400), (u"пятьсот", 500), - (u"шестьсот", 600), (u"семьсот", 700), (u"восемьсот", 800), - (u"девятьсот", 900), (u"тысяча", 1000), (u"миллион", 1000000), - (u"миллиард", 1000000000) -] +nums = { + 'один': 1, 'два': 2, 'три': 3, 'четыре': 4, 'пять': 5, + 'шесть': 6, 'семь': 7, 'восемь': 8, 'девять': 9, + 'десять': 10, 'одиннадцать': 11, 'двенадцать': 12, + 'тринадцать': 13, 'четырнадцать': 14, 'пятнадцать': 15, + 'шестнадцать': 16, 'семнадцать': 17, 'восемнадцать': 18, + 'девятнадцать': 19, 'двадцать': 20, 'тридцать': 30, + 'сорок': 40, 'пятьдесят': 50, 'шестьдесят': 60, + 'семьдесят': 70, 'восемдесят': 80, 'девяносто': 90, 'сто': 100, + 'двести': 200, 'триста': 300, 'четыреста': 400, 'пятьсот': 500, + 'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800, + 'девятьсот': 900, 'тысяча': 1000, 'миллион': 1000000, + 'миллиард': 1000000000 +} diff --git a/numberize/dicts/uk.py b/numberize/dicts/uk.py index d856ece..e77bf38 100644 --- a/numberize/dicts/uk.py +++ b/numberize/dicts/uk.py @@ -1,14 +1,14 @@ -uk_nums = [ - (u"один", 1), (u"два", 2), (u"три", 3), (u"чотири", 4), (u"п'ять", 5), - (u"шість", 6), (u"сім", 7), (u"вісім", 8), (u"дев'ять", 9), - (u"десять", 10), (u"одинадцять", 11), (u"дванадцять", 12), - (u"тринадцять", 13), (u"чотирнадцять", 14), (u"п'ятнадцять", 15), - (u"шістнадцять", 16), (u"сімнадцять", 17), (u"вісімнадцять", 18), - (u"дев'ятнадцять", 19), (u"двадцять", 20), (u"тридцять", 30), - (u"сорок", 40), (u"п'ятдесят", 50), (u"шістдесят", 60), - (u"сімдесят", 70), (u"вісімдесят", 80), (u"дев'яносто", 90), (u"сто", 100), - (u"двісті", 200), (u"триста", 300), (u"чотириста", 400), (u"п'ятсот", 500), - (u"шістсот", 600), (u"сімсот", 700), (u"вісімсот", 800), - (u"дев'ятсот", 900), (u"тисяча", 1000), (u"мільйон", 1000000), - (u"мільярд", 1000000000) -] +nums = { + "один": 1, "два": 2, "три": 3, "чотири": 4, "п'ять": 5, + "шість": 6, "сім": 7, "вісім": 8, "дев'ять": 9, + "десять": 10, "одинадцять": 11, "дванадцять": 12, + "тринадцять": 13, "чотирнадцять": 14, "п'ятнадцять": 15, + "шістнадцять": 16, "сімнадцять": 17, "вісімнадцять": 18, + "дев'ятнадцять": 19, "двадцять": 20, "тридцять": 30, + "сорок": 40, "п'ятдесят": 50, "шістдесят": 60, + "сімдесят": 70, "вісімдесят": 80, "дев'яносто": 90, "сто": 100, + "двісті": 200, "триста": 300, "чотириста": 400, "п'ятсот": 500, + "шістсот": 600, "сімсот": 700, "вісімсот": 800, + "дев'ятсот": 900, "тисяча": 1000, "мільйон": 1000000, + "мільярд": 1000000000 +} diff --git a/numberize/linguists.py b/numberize/linguists.py index c307e99..901ef7e 100644 --- a/numberize/linguists.py +++ b/numberize/linguists.py @@ -3,7 +3,7 @@ import pymorphy2 -import numberize.dawgs as dawgs +import numberize.dicts as dicts class Linguist(ABC): @@ -26,14 +26,14 @@ def get_number(token: str) -> Optional[int]: parts = token.split('-') if len(parts) != 2: return - left = dawgs.en_nums.get(parts[0]) + left = dicts.en.nums.get(parts[0]) if not left or left < 20 or left > 90: return - right = dawgs.en_nums.get(parts[1]) + right = dicts.en.nums.get(parts[1]) if not right or right > 9 or right < 1: return return left + right - return dawgs.en_nums.get(token) + return dicts.en.nums.get(token) class RuLinguist(Linguist): @@ -48,7 +48,7 @@ def get_number(self, token: str) -> Optional[int]: if token[-1] == '.' and len(token) > 3: token = token[:-1] for form in self.analyzer.normal_forms(token): - number = dawgs.ru_nums.get(form) + number = dicts.ru.nums.get(form) if number: return number @@ -65,6 +65,6 @@ def get_number(self, token: str) -> Optional[int]: if token[-1] == '.' and len(token) > 3: # TokTokTokenizer sometimes token = token[:-1] # doesn't tokenize points "тисяча." for form in self.analyzer.normal_forms(token): - number = dawgs.uk_nums.get(form) + number = dicts.uk.nums.get(form) if number: return number diff --git a/setup.py b/setup.py index 9342a1f..221e221 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ 'numberize.dicts' ], python_requires=">=3.6", - install_requires=['pymorphy2[fast]', 'pymorphy2-dicts-uk', 'nltk', 'DAWG'], + install_requires=['pymorphy2[fast]', 'pymorphy2-dicts-uk', 'nltk'], setup_requires=['pytest-runner'], tests_require=['pytest==6.2.4'], test_suite='tests'