From 895095df30f17619c05f74bdc37f807d983411a1 Mon Sep 17 00:00:00 2001 From: DanATW Date: Mon, 8 Nov 2021 15:02:19 +0200 Subject: [PATCH] more tests --- tests/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 204 bytes tests/test_calculators.py | 72 +++++------ tests/test_linguists.py | 88 +++++++------- tests/test_numberizer.py | 142 +++++++++++----------- 4 files changed, 151 insertions(+), 151 deletions(-) create mode 100644 tests/__pycache__/__init__.cpython-38.pyc diff --git a/tests/__pycache__/__init__.cpython-38.pyc b/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87750efd220340bf8216803b52562a60af652a63 GIT binary patch literal 204 zcmYj}Jqp4=6ohw^Mgl=^A!*zTh*}5=f{oauc_i5a580o&dRa4?~DE~^RtjK;sA(`YY)4b%I$>hL_^hGNxct5UcZ`&9UlruFa$0Rc21 literal 0 HcmV?d00001 diff --git a/tests/test_calculators.py b/tests/test_calculators.py index edbfe0c..f33dd41 100644 --- a/tests/test_calculators.py +++ b/tests/test_calculators.py @@ -1,36 +1,36 @@ -import pytest - -from numberize.calculators import AmericanEnCalculator, CyrillicCalculator - -EN_DATA = [ - ((100, ), '100'), - ((25, ), '25'), - ((1, ), '1'), - ((1000, ), '1000'), - ((99, ), '99'), - ((1000000, ), '1000000'), - ((1, 100, 25), '125'), - ((1, 100), '100'), - ((2, 1000000, 6, 100, 25, 1000, 3, 100, 10), '2625310') -] - -CYRILLIC_DATA = [ - ((1, ), '1'), - ((10, ), '10'), - ((100, ), '100'), - ((1000, ), '1000'), - ((20, 5), '25'), - ((100, 20, 5), '125'), - ((6, 1000000, 600, 5, 1000, 20), '6605020'), - ((1000000, ), '1000000') -] - - -@pytest.mark.parametrize("numeral,expected_output", EN_DATA) -def test_american_en_calculator(numeral, expected_output): - assert AmericanEnCalculator().calculate(numeral) == expected_output - - -@pytest.mark.parametrize("numeral,expected_output", CYRILLIC_DATA) -def test_cyrillic_calculator(numeral, expected_output): - assert CyrillicCalculator().calculate(numeral) == expected_output +import pytest + +from numberize.calculators import AmericanEnCalculator, CyrillicCalculator + +EN_DATA = [ + ((100, ), '100'), + ((25, ), '25'), + ((1, ), '1'), + ((1000, ), '1000'), + ((99, ), '99'), + ((1000000, ), '1000000'), + ((1, 100, 25), '125'), + ((1, 100), '100'), + ((2, 1000000, 6, 100, 25, 1000, 3, 100, 10), '2625310') +] + +CYRILLIC_DATA = [ + ((1, ), '1'), + ((10, ), '10'), + ((100, ), '100'), + ((1000, ), '1000'), + ((20, 5), '25'), + ((100, 20, 5), '125'), + ((6, 1000000, 600, 5, 1000, 20), '6605020'), + ((1000000, ), '1000000') +] + + +@pytest.mark.parametrize("numeral,expected_output", EN_DATA) +def test_american_en_calculator(numeral, expected_output): + assert AmericanEnCalculator().calculate(numeral) == expected_output + + +@pytest.mark.parametrize("numeral,expected_output", CYRILLIC_DATA) +def test_cyrillic_calculator(numeral, expected_output): + assert CyrillicCalculator().calculate(numeral) == expected_output diff --git a/tests/test_linguists.py b/tests/test_linguists.py index c87705f..4a50b31 100644 --- a/tests/test_linguists.py +++ b/tests/test_linguists.py @@ -1,44 +1,44 @@ -import pytest - -import pymorphy2 - -import numberize.linguists as linguists - -EN_DATA = [ - ("twenty-five", 25), ("one", 1), ("hundred", 100), - ("million", 1000000), ("billion", 1000000000), ("ruler", None), - ("twenty-ten", None), ("one-two", None), ("two-three-four", None), - ("yeay-sheeh", None), ("пять", None) -] - -RU_DATA = [ - ("пять", 5), ("five", None), ("ста", 100), ("одного", 1), - ("тысячи", 1000), ("сто", 100), ("п'яти", None), ("дваДцати.", 20) -] - -UK_DATA = [ - ("п'яти.", 5), ("ста", 100), ("five", None), ("пяти", None), - ("сімох", 7), ("ТисЯчі", 1000), ("Ти сячі", None), - ("мільйона", 1000000) -] - -ru_morph = pymorphy2.MorphAnalyzer(result_type=None) -uk_morph = pymorphy2.MorphAnalyzer(lang="uk", result_type=None) - - -@pytest.mark.parametrize("token,expected_output", EN_DATA) -def test_en_linguist(token, expected_output): - ling = linguists.EnLinguist() - assert ling.get_number(token) == expected_output - - -@pytest.mark.parametrize("token,expected_output", RU_DATA) -def test_ru_linguist(token, expected_output): - ling = linguists.RuLinguist(ru_morph) - assert ling.get_number(token) == expected_output - - -@pytest.mark.parametrize("token,expected_output", UK_DATA) -def test_uk_linguist(token, expected_output): - ling = linguists.UkLinguist(uk_morph) - assert ling.get_number(token) == expected_output +import pytest + +import pymorphy2 + +import numberize.linguists as linguists + +EN_DATA = [ + ("twenty-five", 25), ("one", 1), ("hundred", 100), + ("million", 1000000), ("billion", 1000000000), ("ruler", None), + ("twenty-ten", None), ("one-two", None), ("two-three-four", None), + ("yeay-sheeh", None), ("пять", None) +] + +RU_DATA = [ + ("пять", 5), ("five", None), ("ста", 100), ("одного", 1), + ("тысячи", 1000), ("сто", 100), ("п'яти", None), ("дваДцати.", 20) +] + +UK_DATA = [ + ("п'яти.", 5), ("ста", 100), ("five", None), ("пяти", None), + ("сімох", 7), ("ТисЯчі", 1000), ("Ти сячі", None), + ("мільйона", 1000000) +] + +ru_morph = pymorphy2.MorphAnalyzer(result_type=None) +uk_morph = pymorphy2.MorphAnalyzer(lang="uk", result_type=None) + + +@pytest.mark.parametrize("token,expected_output", EN_DATA) +def test_en_linguist(token, expected_output): + ling = linguists.EnLinguist() + assert ling.get_number(token) == expected_output + + +@pytest.mark.parametrize("token,expected_output", RU_DATA) +def test_ru_linguist(token, expected_output): + ling = linguists.RuLinguist(ru_morph) + assert ling.get_number(token) == expected_output + + +@pytest.mark.parametrize("token,expected_output", UK_DATA) +def test_uk_linguist(token, expected_output): + ling = linguists.UkLinguist(uk_morph) + assert ling.get_number(token) == expected_output diff --git a/tests/test_numberizer.py b/tests/test_numberizer.py index ed845bd..17c01bd 100644 --- a/tests/test_numberizer.py +++ b/tests/test_numberizer.py @@ -1,71 +1,71 @@ -import pytest - -import numberize.numberizer as numberizer - -EN_DATA = [ - ("twenty-five", "25"), - ("one hundred and nine", "100 and 9"), - ("million dollars", "1000000 dollars"), - ("There's a dog over the one's yard", "There's a dog over the 1's yard"), - ("hundred", '100'), - ("hundreds of thousands", "hundreds of thousands"), - ("five-nine", "five-nine"), - ("'two miLlion' - that's a number.", "'2000000' - that's a number."), - ( - """ - Note the use of more than one conjunction "and" in large numbers in British - English: two million six hundred and twenty-five thousand three hundred and - ten (2,625,310). - In American English, the conjunction "and" is generally not used before tens - or ones: one hundred twenty-three (123); - four hundred seven (407); three thousand five hundred thirty-eight (3,538); - seventy-three thousand five (73,005); - two million six hundred twenty-five thousand three hundred ten (2,625,310); - five million three hundred thousand fifty (5,300,050). - - - In British English, the conjunction "and" is also used before tens or ones - in ordinal numerals above one hundred: - one hundred and tenth (110th); three thousand and fifth (3005th). - But "and" is not used in American ordinals: - one hundred tenth (110th); three thousand fifth (3005th). - """, - """ - Note the use of more than 1 conjunction "and" in large numbers in British - English: 2000600 and 25300 and - 10 (2,625,310). - In American English, the conjunction "and" is generally not used before tens - or ones: 123 (123); - 407 (407); 3538 (3,538); - 73005 (73,005); - 2625310 (2,625,310); - 5300050 (5,300,050). - - - In British English, the conjunction "and" is also used before tens or ones - in ordinal numerals above 100: - 100 and tenth (110th); 3000 and fifth (3005th). - But "and" is not used in American ordinals: - 100 tenth (110th); 3000 fifth (3005th). - """ - ) -] - -en_numberizer = numberizer.Numberizer('en') -ru_numberizer = numberizer.Numberizer('ru') -uk_numberizer = numberizer.Numberizer('uk') - - -@pytest.mark.parametrize("text,expected_output", EN_DATA) -def test_replace_numerals_en(text, expected_output): - ans = ''.join(en_numberizer.replace_numerals(text).split()) - exp = ''.join(expected_output.split()) - assert ans == exp - - -# def test_replace_numerals_ru(): -# pass - - -# def test_replace_numerals_uk(): -# pass +import pytest + +import numberize.numberizer as numberizer + +EN_DATA = [ + ("twenty-five", "25"), + ("one hundred and nine", "100 and 9"), + ("million dollars", "1000000 dollars"), + ("There's a dog over the one's yard", "There's a dog over the 1's yard"), + ("hundred", '100'), + ("hundreds of thousands", "hundreds of thousands"), + ("five-nine", "five-nine"), + ("'two miLlion' - that's a number.", "'2000000' - that's a number."), + ( + """ + Note the use of more than one conjunction "and" in large numbers in British + English: two million six hundred and twenty-five thousand three hundred and + ten (2,625,310). + In American English, the conjunction "and" is generally not used before tens + or ones: one hundred twenty-three (123); + four hundred seven (407); three thousand five hundred thirty-eight (3,538); + seventy-three thousand five (73,005); + two million six hundred twenty-five thousand three hundred ten (2,625,310); + five million three hundred thousand fifty (5,300,050). + + + In British English, the conjunction "and" is also used before tens or ones + in ordinal numerals above one hundred: + one hundred and tenth (110th); three thousand and fifth (3005th). + But "and" is not used in American ordinals: + one hundred tenth (110th); three thousand fifth (3005th). + """, + """ + Note the use of more than 1 conjunction "and" in large numbers in British + English: 2000600 and 25300 and + 10 (2,625,310). + In American English, the conjunction "and" is generally not used before tens + or ones: 123 (123); + 407 (407); 3538 (3,538); + 73005 (73,005); + 2625310 (2,625,310); + 5300050 (5,300,050). + + + In British English, the conjunction "and" is also used before tens or ones + in ordinal numerals above 100: + 100 and tenth (110th); 3000 and fifth (3005th). + But "and" is not used in American ordinals: + 100 tenth (110th); 3000 fifth (3005th). + """ + ) +] + +en_numberizer = numberizer.Numberizer('en') +ru_numberizer = numberizer.Numberizer('ru') +uk_numberizer = numberizer.Numberizer('uk') + + +@pytest.mark.parametrize("text,expected_output", EN_DATA) +def test_replace_numerals_en(text, expected_output): + ans = ''.join(en_numberizer.replace_numerals(text).split()) + exp = ''.join(expected_output.split()) + assert ans == exp + + +# def test_replace_numerals_ru(): +# pass + + +# def test_replace_numerals_uk(): +# pass