Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kudanai committed Dec 23, 2023
1 parent d828d45 commit 4ed706e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 15 deletions.
9 changes: 9 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Tests

This package contains tests for the translation file.
Run them using pytest against the repository root.

```shell
pip install pytest
pytest -v
```
25 changes: 23 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
from pathlib import Path

BASE_PATH = Path(__file__).resolve().parent.parent
TRANSLATION_FILE = BASE_PATH/"master_dv.divehi.txt"

class QuranCorpus:
"""
Simple wrapper class to read
content from master_dv.divehi.txt
"""

BASE_PATH = Path(__file__).resolve().parent.parent
TRANSLATION_FILE = BASE_PATH / "master_dv.divehi.txt"

@staticmethod
def read_all():
with open(QuranCorpus.TRANSLATION_FILE, "r") as f:
return f.read()

@staticmethod
def read_lines():
with open(QuranCorpus.TRANSLATION_FILE, "r") as f:
for line in f:
if line.startswith("\n") or line.startswith("#"):
continue

yield line
7 changes: 3 additions & 4 deletions tests/test_content_structure.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from . import TRANSLATION_FILE
from . import QuranCorpus


def test_line_count():
"""
Number of lines in translation files must == 6236
"""
with open(TRANSLATION_FILE, "r") as f:
lines = [line for line in f.readlines() if not line.startswith("#")]
assert len(lines) == 6236, f"Expected 6236 lines, got {len(lines)}"
lines = list(QuranCorpus.read_lines())
assert len(lines) == 6236, f"Expected 6236 lines, got {len(lines)}"
16 changes: 7 additions & 9 deletions tests/test_content_validity.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import re
from . import TRANSLATION_FILE
from . import QuranCorpus


def test_no_bom():
"""
Translation file should not contain BOM's
as per Tanzil directive
"""
with open(TRANSLATION_FILE, "r") as f:
trans = f.read()
assert "\uFEFF" not in trans
trans = QuranCorpus.read_all()
assert "\uFEFF" not in trans


def test_no_repeated_fili():
Expand All @@ -20,10 +19,9 @@ def test_no_repeated_fili():
error_list = []
pattern = re.compile("[\u07A6-\u07AB]{2}")

with open(TRANSLATION_FILE, "r") as f:
for line in f:
if re.findall(pattern, line):
sura, ayat, trans = line.split("|")
error_list.append(f"{sura}|{ayat}")
for line in QuranCorpus.read_lines():
if re.findall(pattern, line):
sura, ayat, trans = line.split("|")
error_list.append(f"{sura}|{ayat}")

assert len(error_list) == 0, f"Repeated fili found on {error_list}"

0 comments on commit 4ed706e

Please sign in to comment.