This repository has been archived by the owner on Oct 5, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9b00189
commit 895095d
Showing
4 changed files
with
151 additions
and
151 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,36 @@ | ||
import pytest | ||
|
||
from numberize.calculators import AmericanEnCalculator, CyrillicCalculator | ||
|
||
EN_DATA = [ | ||
((100, ), '100'), | ||
((25, ), '25'), | ||
((1, ), '1'), | ||
((1000, ), '1000'), | ||
((99, ), '99'), | ||
((1000000, ), '1000000'), | ||
((1, 100, 25), '125'), | ||
((1, 100), '100'), | ||
((2, 1000000, 6, 100, 25, 1000, 3, 100, 10), '2625310') | ||
] | ||
|
||
CYRILLIC_DATA = [ | ||
((1, ), '1'), | ||
((10, ), '10'), | ||
((100, ), '100'), | ||
((1000, ), '1000'), | ||
((20, 5), '25'), | ||
((100, 20, 5), '125'), | ||
((6, 1000000, 600, 5, 1000, 20), '6605020'), | ||
((1000000, ), '1000000') | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("numeral,expected_output", EN_DATA) | ||
def test_american_en_calculator(numeral, expected_output): | ||
assert AmericanEnCalculator().calculate(numeral) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("numeral,expected_output", CYRILLIC_DATA) | ||
def test_cyrillic_calculator(numeral, expected_output): | ||
assert CyrillicCalculator().calculate(numeral) == expected_output | ||
import pytest | ||
|
||
from numberize.calculators import AmericanEnCalculator, CyrillicCalculator | ||
|
||
EN_DATA = [ | ||
((100, ), '100'), | ||
((25, ), '25'), | ||
((1, ), '1'), | ||
((1000, ), '1000'), | ||
((99, ), '99'), | ||
((1000000, ), '1000000'), | ||
((1, 100, 25), '125'), | ||
((1, 100), '100'), | ||
((2, 1000000, 6, 100, 25, 1000, 3, 100, 10), '2625310') | ||
] | ||
|
||
CYRILLIC_DATA = [ | ||
((1, ), '1'), | ||
((10, ), '10'), | ||
((100, ), '100'), | ||
((1000, ), '1000'), | ||
((20, 5), '25'), | ||
((100, 20, 5), '125'), | ||
((6, 1000000, 600, 5, 1000, 20), '6605020'), | ||
((1000000, ), '1000000') | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("numeral,expected_output", EN_DATA) | ||
def test_american_en_calculator(numeral, expected_output): | ||
assert AmericanEnCalculator().calculate(numeral) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("numeral,expected_output", CYRILLIC_DATA) | ||
def test_cyrillic_calculator(numeral, expected_output): | ||
assert CyrillicCalculator().calculate(numeral) == expected_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,44 @@ | ||
import pytest | ||
|
||
import pymorphy2 | ||
|
||
import numberize.linguists as linguists | ||
|
||
EN_DATA = [ | ||
("twenty-five", 25), ("one", 1), ("hundred", 100), | ||
("million", 1000000), ("billion", 1000000000), ("ruler", None), | ||
("twenty-ten", None), ("one-two", None), ("two-three-four", None), | ||
("yeay-sheeh", None), ("пять", None) | ||
] | ||
|
||
RU_DATA = [ | ||
("пять", 5), ("five", None), ("ста", 100), ("одного", 1), | ||
("тысячи", 1000), ("сто", 100), ("п'яти", None), ("дваДцати.", 20) | ||
] | ||
|
||
UK_DATA = [ | ||
("п'яти.", 5), ("ста", 100), ("five", None), ("пяти", None), | ||
("сімох", 7), ("ТисЯчі", 1000), ("Ти сячі", None), | ||
("мільйона", 1000000) | ||
] | ||
|
||
ru_morph = pymorphy2.MorphAnalyzer(result_type=None) | ||
uk_morph = pymorphy2.MorphAnalyzer(lang="uk", result_type=None) | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", EN_DATA) | ||
def test_en_linguist(token, expected_output): | ||
ling = linguists.EnLinguist() | ||
assert ling.get_number(token) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", RU_DATA) | ||
def test_ru_linguist(token, expected_output): | ||
ling = linguists.RuLinguist(ru_morph) | ||
assert ling.get_number(token) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", UK_DATA) | ||
def test_uk_linguist(token, expected_output): | ||
ling = linguists.UkLinguist(uk_morph) | ||
assert ling.get_number(token) == expected_output | ||
import pytest | ||
|
||
import pymorphy2 | ||
|
||
import numberize.linguists as linguists | ||
|
||
EN_DATA = [ | ||
("twenty-five", 25), ("one", 1), ("hundred", 100), | ||
("million", 1000000), ("billion", 1000000000), ("ruler", None), | ||
("twenty-ten", None), ("one-two", None), ("two-three-four", None), | ||
("yeay-sheeh", None), ("пять", None) | ||
] | ||
|
||
RU_DATA = [ | ||
("пять", 5), ("five", None), ("ста", 100), ("одного", 1), | ||
("тысячи", 1000), ("сто", 100), ("п'яти", None), ("дваДцати.", 20) | ||
] | ||
|
||
UK_DATA = [ | ||
("п'яти.", 5), ("ста", 100), ("five", None), ("пяти", None), | ||
("сімох", 7), ("ТисЯчі", 1000), ("Ти сячі", None), | ||
("мільйона", 1000000) | ||
] | ||
|
||
ru_morph = pymorphy2.MorphAnalyzer(result_type=None) | ||
uk_morph = pymorphy2.MorphAnalyzer(lang="uk", result_type=None) | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", EN_DATA) | ||
def test_en_linguist(token, expected_output): | ||
ling = linguists.EnLinguist() | ||
assert ling.get_number(token) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", RU_DATA) | ||
def test_ru_linguist(token, expected_output): | ||
ling = linguists.RuLinguist(ru_morph) | ||
assert ling.get_number(token) == expected_output | ||
|
||
|
||
@pytest.mark.parametrize("token,expected_output", UK_DATA) | ||
def test_uk_linguist(token, expected_output): | ||
ling = linguists.UkLinguist(uk_morph) | ||
assert ling.get_number(token) == expected_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,71 @@ | ||
import pytest | ||
|
||
import numberize.numberizer as numberizer | ||
|
||
EN_DATA = [ | ||
("twenty-five", "25"), | ||
("one hundred and nine", "100 and 9"), | ||
("million dollars", "1000000 dollars"), | ||
("There's a dog over the one's yard", "There's a dog over the 1's yard"), | ||
("hundred", '100'), | ||
("hundreds of thousands", "hundreds of thousands"), | ||
("five-nine", "five-nine"), | ||
("'two miLlion' - that's a number.", "'2000000' - that's a number."), | ||
( | ||
""" | ||
Note the use of more than one conjunction "and" in large numbers in British | ||
English: two million six hundred and twenty-five thousand three hundred and | ||
ten (2,625,310). | ||
In American English, the conjunction "and" is generally not used before tens | ||
or ones: one hundred twenty-three (123); | ||
four hundred seven (407); three thousand five hundred thirty-eight (3,538); | ||
seventy-three thousand five (73,005); | ||
two million six hundred twenty-five thousand three hundred ten (2,625,310); | ||
five million three hundred thousand fifty (5,300,050). | ||
In British English, the conjunction "and" is also used before tens or ones | ||
in ordinal numerals above one hundred: | ||
one hundred and tenth (110th); three thousand and fifth (3005th). | ||
But "and" is not used in American ordinals: | ||
one hundred tenth (110th); three thousand fifth (3005th). | ||
""", | ||
""" | ||
Note the use of more than 1 conjunction "and" in large numbers in British | ||
English: 2000600 and 25300 and | ||
10 (2,625,310). | ||
In American English, the conjunction "and" is generally not used before tens | ||
or ones: 123 (123); | ||
407 (407); 3538 (3,538); | ||
73005 (73,005); | ||
2625310 (2,625,310); | ||
5300050 (5,300,050). | ||
In British English, the conjunction "and" is also used before tens or ones | ||
in ordinal numerals above 100: | ||
100 and tenth (110th); 3000 and fifth (3005th). | ||
But "and" is not used in American ordinals: | ||
100 tenth (110th); 3000 fifth (3005th). | ||
""" | ||
) | ||
] | ||
|
||
en_numberizer = numberizer.Numberizer('en') | ||
ru_numberizer = numberizer.Numberizer('ru') | ||
uk_numberizer = numberizer.Numberizer('uk') | ||
|
||
|
||
@pytest.mark.parametrize("text,expected_output", EN_DATA) | ||
def test_replace_numerals_en(text, expected_output): | ||
ans = ''.join(en_numberizer.replace_numerals(text).split()) | ||
exp = ''.join(expected_output.split()) | ||
assert ans == exp | ||
|
||
|
||
# def test_replace_numerals_ru(): | ||
# pass | ||
|
||
|
||
# def test_replace_numerals_uk(): | ||
# pass | ||
import pytest | ||
|
||
import numberize.numberizer as numberizer | ||
|
||
EN_DATA = [ | ||
("twenty-five", "25"), | ||
("one hundred and nine", "100 and 9"), | ||
("million dollars", "1000000 dollars"), | ||
("There's a dog over the one's yard", "There's a dog over the 1's yard"), | ||
("hundred", '100'), | ||
("hundreds of thousands", "hundreds of thousands"), | ||
("five-nine", "five-nine"), | ||
("'two miLlion' - that's a number.", "'2000000' - that's a number."), | ||
( | ||
""" | ||
Note the use of more than one conjunction "and" in large numbers in British | ||
English: two million six hundred and twenty-five thousand three hundred and | ||
ten (2,625,310). | ||
In American English, the conjunction "and" is generally not used before tens | ||
or ones: one hundred twenty-three (123); | ||
four hundred seven (407); three thousand five hundred thirty-eight (3,538); | ||
seventy-three thousand five (73,005); | ||
two million six hundred twenty-five thousand three hundred ten (2,625,310); | ||
five million three hundred thousand fifty (5,300,050). | ||
In British English, the conjunction "and" is also used before tens or ones | ||
in ordinal numerals above one hundred: | ||
one hundred and tenth (110th); three thousand and fifth (3005th). | ||
But "and" is not used in American ordinals: | ||
one hundred tenth (110th); three thousand fifth (3005th). | ||
""", | ||
""" | ||
Note the use of more than 1 conjunction "and" in large numbers in British | ||
English: 2000600 and 25300 and | ||
10 (2,625,310). | ||
In American English, the conjunction "and" is generally not used before tens | ||
or ones: 123 (123); | ||
407 (407); 3538 (3,538); | ||
73005 (73,005); | ||
2625310 (2,625,310); | ||
5300050 (5,300,050). | ||
In British English, the conjunction "and" is also used before tens or ones | ||
in ordinal numerals above 100: | ||
100 and tenth (110th); 3000 and fifth (3005th). | ||
But "and" is not used in American ordinals: | ||
100 tenth (110th); 3000 fifth (3005th). | ||
""" | ||
) | ||
] | ||
|
||
en_numberizer = numberizer.Numberizer('en') | ||
ru_numberizer = numberizer.Numberizer('ru') | ||
uk_numberizer = numberizer.Numberizer('uk') | ||
|
||
|
||
@pytest.mark.parametrize("text,expected_output", EN_DATA) | ||
def test_replace_numerals_en(text, expected_output): | ||
ans = ''.join(en_numberizer.replace_numerals(text).split()) | ||
exp = ''.join(expected_output.split()) | ||
assert ans == exp | ||
|
||
|
||
# def test_replace_numerals_ru(): | ||
# pass | ||
|
||
|
||
# def test_replace_numerals_uk(): | ||
# pass |