Skip to content

Commit

Permalink
Merge pull request #48 from kudanai/kudanai/cleanup
Browse files Browse the repository at this point in the history
Move transliterator to it's own package
  • Loading branch information
kudanai authored Dec 24, 2023
2 parents ebdf671 + 6b87164 commit e6d3bb8
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 155 deletions.
Empty file added utils/__init__.py
Empty file.
149 changes: 0 additions & 149 deletions utils/output/repeated_lines.txt

This file was deleted.

10 changes: 10 additions & 0 deletions utils/transliterator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Transliterator

This utility aids the generation of the alternate (transliterated) translation file
using a custom dictionary swap with a few rules applied.

## Usage

```shell
python ./utils/transliterator/generate_alt_text_with_thikithaana.py
```
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import csv
import transliterate_arabic_to_thaana as trans

INPUT_FILE = "master_dv.divehi.txt"
OUTPUT_FILE = "alt_dv.divehi.txt"

INPUT_FILE = trans.BASE_DIR/"master_dv.divehi.txt"
OUTPUT_FILE = trans.BASE_DIR/"alt_dv.divehi.txt"


def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@

import re
import csv
from pathlib import Path

ARAB_SET = u"\u0621-\u06FF"
THAA_SET = u"\u0780-\u07B1"
LONG_FILI = u"\u07A7\u07A9\u07AB\u07AD\u07AF"

INPUT_FILE = "utils/transliteration_test/arabic_words_with_next.txt"
OUTPUT_FILE = "utils/transliteration_test/arabic_words_with_next_out.csv"
BASE_DIR = Path(__file__).resolve().parent.parent.parent

TRANSLIT_FILE = "utils/transliteration_data/_quran_freq_dv.csv"
NEXT_WORDS_FILE = "utils/transliteration_data/_dhivehi_next_words.csv"
INPUT_FILE = BASE_DIR/"utils/transliterator/transliteration_test/arabic_words_with_next.txt"
OUTPUT_FILE = BASE_DIR/"utils/transliterator/transliteration_test/arabic_words_with_next_out.csv"

TRANSLIT_FILE = BASE_DIR/"utils/transliterator/transliteration_data/_quran_freq_dv.csv"
NEXT_WORDS_FILE = BASE_DIR/"utils/transliterator/transliteration_data/_dhivehi_next_words.csv"

tranliteration_dicts = []
with open(TRANSLIT_FILE, mode='r', encoding='utf-8-sig', newline='') as translit_file:
Expand Down

0 comments on commit e6d3bb8

Please sign in to comment.