-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5212b17
commit d8e27fa
Showing
2 changed files
with
59 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,51 @@ | ||
""" | ||
Test whatlang-pyo3 with unittest | ||
""" | ||
|
||
from whatlang import detect, detect_script, detect_lang, batch_detect | ||
import time | ||
import unittest | ||
|
||
class TestWhatlang(unittest.TestCase): | ||
|
||
def test_detect(self): | ||
result = detect("Ceci est écrit en français") | ||
self.assertEqual(result.lang, "fra") | ||
self.assertGreater(result.confidence, 0.1) | ||
|
||
def test_detect_script(self): | ||
result = detect_script("Ĉu vi ne volas eklerni Esperanton? Bonvolu! Estas unu de la plej bonaj aferoj!") | ||
self.assertEqual(result.name, "Latin") | ||
|
||
def main(): | ||
def test_detect_lang(self): | ||
result = detect_lang("Ceci est écrit en français") | ||
self.assertEqual(result.lang, "fra") | ||
|
||
result = detect("Ceci est écrit en français") | ||
script = detect_script("Ĉu vi ne volas eklerni Esperanton? Bonvolu! Estas unu de la plej bonaj aferoj!") | ||
lang = detect_lang("Ceci est écrit en français") | ||
batch = batch_detect(["Ceci est écrit en français", "Ĉu vi ne volas eklerni Esperanton? Bonvolu! Estas unu de la plej bonaj aferoj!"]) | ||
print(result) | ||
print(script) | ||
print(lang) | ||
def test_batch_detect(self): | ||
result = batch_detect(["Ceci est écrit en français", "Ĉu vi ne volas eklerni Esperanton? Bonvolu! Estas unu de la plej bonaj aferoj!"]) | ||
self.assertEqual(result[0].lang, "fra") | ||
self.assertGreater(result[0].confidence, 0.1) | ||
self.assertEqual(result[1].lang, "epo") | ||
self.assertGreater(result[1].confidence, 0.5) | ||
|
||
def compare_batch_with_single_performance(): | ||
# Create a list of 10000 texts | ||
from whatlang import detect, batch_detect | ||
import time | ||
n = 50_000 | ||
texts = ["Ceci est écrit en français"] * n | ||
print("--------------------------Batch detect--------------------------") | ||
start = time.perf_counter() | ||
batch_detect(texts, n_jobs=-1) | ||
end = time.perf_counter() | ||
print(f"Batch detect for {n} texts took {end - start} seconds") | ||
print("--------------------------Single detect--------------------------") | ||
start = time.perf_counter() | ||
for text in texts: | ||
detect(text) | ||
end = time.perf_counter() | ||
print(f"Single detect for {n} texts took {end - start} seconds") | ||
def test_performance(self): | ||
# Create a list of 10000 texts | ||
n = 10_000 | ||
texts = ["Ceci est écrit en français"] * n | ||
print("\n--------------------------Batch detect--------------------------") | ||
start = time.perf_counter() | ||
batch_detect(texts, n_jobs=-1) | ||
end = time.perf_counter() | ||
batch = end - start | ||
print(f"Batch detect for {n} texts took {batch} seconds") | ||
print("--------------------------Single detect--------------------------\n") | ||
start = time.perf_counter() | ||
for text in texts: | ||
detect(text) | ||
end = time.perf_counter() | ||
single = end - start | ||
print(f"Single detect for {n} texts took {single} seconds") | ||
self.assertGreaterEqual(single, batch) | ||
|
||
if __name__ == "__main__": | ||
compare_batch_with_single_performance() | ||
unittest.main() |