Skip to content
This repository has been archived by the owner on Oct 5, 2023. It is now read-only.

Commit

Permalink
#7 Remove DAWG dependency to support python3.10
Browse files Browse the repository at this point in the history
  • Loading branch information
yemchenko-dan committed May 1, 2023
1 parent 895095d commit 4d1dabe
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 58 deletions.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
venv/*
.idea/*
.eggs/*
numberize/__pycache__/*
numberize/dicts/__pycache__/*

**/__pycache__
3 changes: 1 addition & 2 deletions numberize.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ requires = [
"wheel",
"pymorhy2[fast]",
"pymorphy2-dicts-uk",
"nltk",
"DAWG"
"nltk"
]
build-backend = "setuptools.build_meta"
7 changes: 0 additions & 7 deletions numberize/dawgs.py

This file was deleted.

21 changes: 10 additions & 11 deletions numberize/dicts/en.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
en_nums = [
(u"one", 1), (u"two", 2), (u"three", 3), (u"four", 4), (u"five", 5),
(u"six", 6), (u"seven", 7), (u"eight", 8), (u"nine", 9),
(u"ten", 10), (u"eleven", 11), (u"twelve", 12),
(u"thirteen", 13), (u"fourteen", 14), (u"fifteen", 15),
(u"sixteen", 16), (u"seventeen", 17), (u"eighteen", 18),
(u"nineteen", 19), (u"twenty", 20), (u"thirty", 30),
(u"forty", 40), (u"fifty", 50), (u"sixty", 60),
(u"seventy", 70), (u"eighty", 80), (u"ninety", 90), (u"hundred", 100),
(u"thousand", 1000), (u"million", 1000000), (u"billion", 1000000000),
]
nums = {
'one': 1, 'two': 2, 'three': 3, 'four': 4,
'five': 5, 'six': 6, 'seven': 7, 'eight': 8,
'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12,
'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16,
'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20,
'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60,
'seventy': 70, 'eighty': 80, 'ninety': 90, 'hundred': 100,
'thousand': 1000, 'million': 1000000, 'billion': 1000000000,
}
28 changes: 14 additions & 14 deletions numberize/dicts/ru.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
ru_nums = [
(u"один", 1), (u"два", 2), (u"три", 3), (u"четыре", 4), (u"пять", 5),
(u"шесть", 6), (u"семь", 7), (u"восемь", 8), (u"девять", 9),
(u"десять", 10), (u"одиннадцать", 11), (u"двенадцать", 12),
(u"тринадцать", 13), (u"четырнадцать", 14), (u"пятнадцать", 15),
(u"шестнадцать", 16), (u"семнадцать", 17), (u"восемнадцать", 18),
(u"девятнадцать", 19), (u"двадцать", 20), (u"тридцать", 30),
(u"сорок", 40), (u"пятьдесят", 50), (u"шестьдесят", 60),
(u"семьдесят", 70), (u"восемдесят", 80), (u"девяносто", 90), (u"сто", 100),
(u"двести", 200), (u"триста", 300), (u"четыреста", 400), (u"пятьсот", 500),
(u"шестьсот", 600), (u"семьсот", 700), (u"восемьсот", 800),
(u"девятьсот", 900), (u"тысяча", 1000), (u"миллион", 1000000),
(u"миллиард", 1000000000)
]
nums = {
'один': 1, 'два': 2, 'три': 3, 'четыре': 4, 'пять': 5,
'шесть': 6, 'семь': 7, 'восемь': 8, 'девять': 9,
'десять': 10, 'одиннадцать': 11, 'двенадцать': 12,
'тринадцать': 13, 'четырнадцать': 14, 'пятнадцать': 15,
'шестнадцать': 16, 'семнадцать': 17, 'восемнадцать': 18,
'девятнадцать': 19, 'двадцать': 20, 'тридцать': 30,
'сорок': 40, 'пятьдесят': 50, 'шестьдесят': 60,
'семьдесят': 70, 'восемдесят': 80, 'девяносто': 90, 'сто': 100,
'двести': 200, 'триста': 300, 'четыреста': 400, 'пятьсот': 500,
'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800,
'девятьсот': 900, 'тысяча': 1000, 'миллион': 1000000,
'миллиард': 1000000000
}
28 changes: 14 additions & 14 deletions numberize/dicts/uk.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
uk_nums = [
(u"один", 1), (u"два", 2), (u"три", 3), (u"чотири", 4), (u"п'ять", 5),
(u"шість", 6), (u"сім", 7), (u"вісім", 8), (u"дев'ять", 9),
(u"десять", 10), (u"одинадцять", 11), (u"дванадцять", 12),
(u"тринадцять", 13), (u"чотирнадцять", 14), (u"п'ятнадцять", 15),
(u"шістнадцять", 16), (u"сімнадцять", 17), (u"вісімнадцять", 18),
(u"дев'ятнадцять", 19), (u"двадцять", 20), (u"тридцять", 30),
(u"сорок", 40), (u"п'ятдесят", 50), (u"шістдесят", 60),
(u"сімдесят", 70), (u"вісімдесят", 80), (u"дев'яносто", 90), (u"сто", 100),
(u"двісті", 200), (u"триста", 300), (u"чотириста", 400), (u"п'ятсот", 500),
(u"шістсот", 600), (u"сімсот", 700), (u"вісімсот", 800),
(u"дев'ятсот", 900), (u"тисяча", 1000), (u"мільйон", 1000000),
(u"мільярд", 1000000000)
]
nums = {
"один": 1, "два": 2, "три": 3, "чотири": 4, "п'ять": 5,
"шість": 6, "сім": 7, "вісім": 8, "дев'ять": 9,
"десять": 10, "одинадцять": 11, "дванадцять": 12,
"тринадцять": 13, "чотирнадцять": 14, "п'ятнадцять": 15,
"шістнадцять": 16, "сімнадцять": 17, "вісімнадцять": 18,
"дев'ятнадцять": 19, "двадцять": 20, "тридцять": 30,
"сорок": 40, "п'ятдесят": 50, "шістдесят": 60,
"сімдесят": 70, "вісімдесят": 80, "дев'яносто": 90, "сто": 100,
"двісті": 200, "триста": 300, "чотириста": 400, "п'ятсот": 500,
"шістсот": 600, "сімсот": 700, "вісімсот": 800,
"дев'ятсот": 900, "тисяча": 1000, "мільйон": 1000000,
"мільярд": 1000000000
}
12 changes: 6 additions & 6 deletions numberize/linguists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pymorphy2

import numberize.dawgs as dawgs
import numberize.dicts as dicts


class Linguist(ABC):
Expand All @@ -26,14 +26,14 @@ def get_number(token: str) -> Optional[int]:
parts = token.split('-')
if len(parts) != 2:
return
left = dawgs.en_nums.get(parts[0])
left = dicts.en.nums.get(parts[0])
if not left or left < 20 or left > 90:
return
right = dawgs.en_nums.get(parts[1])
right = dicts.en.nums.get(parts[1])
if not right or right > 9 or right < 1:
return
return left + right
return dawgs.en_nums.get(token)
return dicts.en.nums.get(token)


class RuLinguist(Linguist):
Expand All @@ -48,7 +48,7 @@ def get_number(self, token: str) -> Optional[int]:
if token[-1] == '.' and len(token) > 3:
token = token[:-1]
for form in self.analyzer.normal_forms(token):
number = dawgs.ru_nums.get(form)
number = dicts.ru.nums.get(form)
if number:
return number

Expand All @@ -65,6 +65,6 @@ def get_number(self, token: str) -> Optional[int]:
if token[-1] == '.' and len(token) > 3: # TokTokTokenizer sometimes
token = token[:-1] # doesn't tokenize points "тисяча."
for form in self.analyzer.normal_forms(token):
number = dawgs.uk_nums.get(form)
number = dicts.uk.nums.get(form)
if number:
return number
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'numberize.dicts'
],
python_requires=">=3.6",
install_requires=['pymorphy2[fast]', 'pymorphy2-dicts-uk', 'nltk', 'DAWG'],
install_requires=['pymorphy2[fast]', 'pymorphy2-dicts-uk', 'nltk'],
setup_requires=['pytest-runner'],
tests_require=['pytest==6.2.4'],
test_suite='tests'
Expand Down

0 comments on commit 4d1dabe

Please sign in to comment.