Skip to content

Commit

Permalink
Merge pull request #13 from BLKSerene/flake_8
Browse files Browse the repository at this point in the history
Fix most flake8 warnings
  • Loading branch information
wannaphong authored Sep 23, 2023
2 parents f23f363 + 0c851bd commit ab4dc1e
Show file tree
Hide file tree
Showing 15 changed files with 69 additions and 66 deletions.
20 changes: 10 additions & 10 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

# -- Project information -----------------------------------------------------

project = u'LaoNLP'
copyright = u'2020 - 2021, Wannaphong Phatthiyaphaibun'
author = u'Wannaphong Phatthiyaphaibun'
project = 'LaoNLP'
copyright = '2020 - 2021, Wannaphong Phatthiyaphaibun'
author = 'Wannaphong Phatthiyaphaibun'

# The short X.Y version
version = u''
version = ''
# The full version, including alpha/beta/rc tags
release = u''
release = ''


# -- General configuration ---------------------------------------------------
Expand Down Expand Up @@ -133,8 +133,8 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'SphinxGitHubActionTest.tex', u'Sphinx GitHub Action Test Documentation',
u'Sean Zheng', 'manual'),
(master_doc, 'SphinxGitHubActionTest.tex', 'Sphinx GitHub Action Test Documentation',
'Sean Zheng', 'manual'),
]


Expand All @@ -143,7 +143,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'sphinxgithubactiontest', u'Sphinx GitHub Action Test Documentation',
(master_doc, 'sphinxgithubactiontest', 'Sphinx GitHub Action Test Documentation',
[author], 1)
]

Expand All @@ -154,7 +154,7 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'SphinxGitHubActionTest', u'Sphinx GitHub Action Test Documentation',
(master_doc, 'SphinxGitHubActionTest', 'Sphinx GitHub Action Test Documentation',
author, 'SphinxGitHubActionTest', 'One line description of project.',
'Miscellaneous'),
]
Expand Down Expand Up @@ -185,4 +185,4 @@
'special-members': '__init__',
'undoc-members': True,
'exclude-members': '__weakref__'
}
}
36 changes: 18 additions & 18 deletions laonlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,33 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
from laonlp.tokenize import *
from laonlp.corpus import *
from laonlp.transliterate import *
from laonlp.tag import pos_tag
from laonlp.tokenize import *
from laonlp.transliterate import *
from laonlp.util import *

TONE_MARKS = "່້"+"໊໋"
CONSONANTS = "ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວຫອຮ"
VOWELS_COMBINING = "ັ"+"ິີ"+"ຶືຸ"+"ູົໍ"
VOWELS = "ະັາ"+"ຳິີ"+"ຶືຸ"+"ູົຼ"+"ຽເແ"+"ໂໃໄ"+"ໍ"
NUMBERS = "໑໒໓໔໕໖໗໘໙໐" # 1234567890
NUMBERS = "໑໒໓໔໕໖໗໘໙໐" # 1234567890
CANCELLATION_MARK = "\u0ECC"
# This is Obsolete consonants.
# You can read at https://en.wikipedia.org/wiki/Lao_script
lao_obsolete_consonants_mapping_thai = {
"ຆ":"ฆ", # PALI GHA
"ຉ":"ฉ", # PALI CHA
"ຌ":"ฌ", # PALI JHA
"ຎ":"ญ", # PALI NYA
"ຏ":"ฏ", # PALI TTA
"ຐ":"ฐ", # PALI TTHA
"ຑ":"ฑ", # PALI DDA
"ຒ":"ฒ", # PALI DDHA
"ຓ":"ณ", # PALI NNA
"ຘ":"ธ", # PALI DHA
"ຠ":"ภ", # PALI BHA
"ຨ":"ศ", # SANSKRIT SHA
"ຩ":"ษ", # SANSKRIT SSA
"ຬ":"ฬ", # PALI LLA
}
"ຆ": "ฆ", # PALI GHA
"ຉ": "ฉ", # PALI CHA
"ຌ": "ฌ", # PALI JHA
"ຎ": "ญ", # PALI NYA
"ຏ": "ฏ", # PALI TTA
"ຐ": "ฐ", # PALI TTHA
"ຑ": "ฑ", # PALI DDA
"ຒ": "ฒ", # PALI DDHA
"ຓ": "ณ", # PALI NNA
"ຘ": "ธ", # PALI DHA
"ຠ": "ภ", # PALI BHA
"ຨ": "ศ", # SANSKRIT SHA
"ຩ": "ษ", # SANSKRIT SSA
"ຬ": "ฬ", # PALI LLA
}
2 changes: 1 addition & 1 deletion laonlp/corpus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
"lao_wiktionarydict",
"get_path_corpus",
"lao_stopwords"
]
]
2 changes: 1 addition & 1 deletion laonlp/corpus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@


def get_path_corpus(file):
return os.path.join(laonlp_path, "corpus", file)
return os.path.join(laonlp_path, "corpus", file)
10 changes: 5 additions & 5 deletions laonlp/corpus/lao_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def lao_dictionary() -> List[str]:
"""
path = get_path_corpus("Lao-Dictionary.txt")
with open(path, "r", encoding="utf-8-sig") as f:
return [i.strip() for i in f.readlines() if i[0]!="#"]
return [i.strip() for i in f.readlines() if i[0] != "#"]


def lao_spellcheckdict() -> List[str]:
Expand All @@ -34,13 +34,13 @@ def lao_spellcheckdict() -> List[str]:
"""
path = get_path_corpus("lo_spellcheck_dict.txt")
with open(path, "r", encoding="utf-8-sig") as f:
return [i.strip() for i in f.readlines() if i[0]!="#"]
return [i.strip() for i in f.readlines() if i[0] != "#"]


def lao_wannaphongdict() -> List[str]:
path = get_path_corpus("lao-wannaphong.txt")
with open(path, "r", encoding="utf-8-sig") as f:
return [i.strip() for i in f.readlines() if i[0]!="#"]
return [i.strip() for i in f.readlines() if i[0] != "#"]


def lao_wiktionarydict() -> List[str]:
Expand All @@ -49,7 +49,7 @@ def lao_wiktionarydict() -> List[str]:
"""
path = get_path_corpus("wiktionary-20210720.txt")
with open(path, "r", encoding="utf-8-sig") as f:
return [i.strip() for i in f.readlines() if i[0]!="#"]
return [i.strip() for i in f.readlines() if i[0] != "#"]


def lao_words() -> List[str]:
Expand All @@ -68,5 +68,5 @@ def lao_stopwords() -> FrozenSet[str]:
path = get_path_corpus("stopwords_lao.txt")
with open(path, "r", encoding="utf-8-sig") as fh:
lines = fh.read().splitlines()
lines = [line.strip() for line in lines if line.startswith("#") == False]
lines = [line.strip() for line in lines if line.startswith("#") is False]
return frozenset(filter(None, lines))
13 changes: 6 additions & 7 deletions laonlp/corpus/mopt_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,37 @@
import csv
from collections import defaultdict

from laonlp.corpus import laonlp_path
from laonlp.corpus.core import get_path_corpus
corpus_path = get_path_corpus("lao-eng-dictionary.csv")
list_data=[]
with open(corpus_path,encoding="utf-8-sig") as csvfile:
list_data = []
with open(corpus_path, encoding="utf-8-sig") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
list_data.append(row)


def get_lao_eng()->dict:
def get_lao_eng() -> dict:
_w = defaultdict(list)
for i in list_data:
_w[i['LaoWord']].append(i['English'])
return _w


def get_eng_lao()->dict:
def get_eng_lao() -> dict:
_w = defaultdict(list)
for i in list_data:
_w[i['English']].append(i['LaoWord'])
return _w


def get_pronunciation()->dict:
def get_pronunciation() -> dict:
_w = defaultdict(list)
for i in list_data:
_w[i['LaoWord']].append(i['Pronunciation'])
return _w


def get_type()->dict:
def get_type() -> dict:
_w = defaultdict(list)
for i in list_data:
_w[i['LaoWord']].append(i['Type'])
Expand Down
5 changes: 3 additions & 2 deletions laonlp/translate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
]
from laonlp.translate.mopt_dict import dictionary

def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos")->list:

def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos") -> list:
"""
Word dictionary
Expand All @@ -29,4 +30,4 @@ def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos")->
:return: return word
:rtype: str
"""
return dictionary(word, src, target)
return dictionary(word, src, target)
6 changes: 2 additions & 4 deletions laonlp/translate/mopt_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List

from laonlp.corpus import mopt_dict


def dictionary(word: str, src: str, target: str)->list:
def dictionary(word: str, src: str, target: str) -> list:
if src == "lao" and target == "eng":
_temp = mopt_dict.get_lao_eng()
if word not in list(_temp.keys()):
Expand All @@ -31,4 +29,4 @@ def dictionary(word: str, src: str, target: str)->list:
return None
return _temp[word]
else:
return word
return word
2 changes: 1 addition & 1 deletion laonlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
)
from laonlp.util.lao import (
remove_tone_mark
)
)
6 changes: 4 additions & 2 deletions laonlp/util/digitconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"ສູນ"
]
_dict_lao_arabic = {
i:j for i,j in zip(list(NUMBERS), list(_arabic_numerals))
i: j for i, j in zip(list(NUMBERS), list(_arabic_numerals))
}
_dict_arabic_lao = {
i:j for i,j in zip(list(_arabic_numerals), list(NUMBERS))
i: j for i, j in zip(list(_arabic_numerals), list(NUMBERS))
}
_lao_arabic_table = str.maketrans(_dict_lao_arabic)
_arabic_lao_table = str.maketrans(_dict_arabic_lao)
Expand All @@ -48,6 +48,7 @@ def lao_digit_to_arabic_digit(text: str) -> str:
"""
return text.translate(_lao_arabic_table)


def arabic_digit_to_lao_digit(text: str) -> str:
"""
Arabic digit to Lao digit
Expand All @@ -58,6 +59,7 @@ def arabic_digit_to_lao_digit(text: str) -> str:
"""
return text.translate(_arabic_lao_table)


def number2lao(numbers: int):
"""
Numbers to La opronunciation
Expand Down
4 changes: 2 additions & 2 deletions laonlp/util/lao.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
limitations under the License.
"""
TONE_MARKS = "່້"+"໊໋"
_tone_mark = str.maketrans({i:None for i in TONE_MARKS})
_tone_mark = str.maketrans({i: None for i in TONE_MARKS})


def remove_tone_mark(text: str) -> str:
Expand All @@ -26,4 +26,4 @@ def remove_tone_mark(text: str) -> str:
:return: returns a lao text without tone mark.
:rtype: str
"""
return text.translate(_tone_mark)
return text.translate(_tone_mark)
19 changes: 10 additions & 9 deletions laonlp/word_vector/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
except ModuleNotFoundError:
raise ModuleNotFoundError('Word vector functionalities require huggingface_hub which is not currently installed. Please try installing the package via "pip install huggingface_hub".')


class Word2Vec:
"""
Word2Vec
"""
def __init__(self, model: str, corpus: str="oscar"):
def __init__(self, model: str, corpus: str = "oscar"):
"""
:param str model: model name (cbow or skip-gram)
:param str corpus: corpus name (oscar)
Expand All @@ -39,27 +40,27 @@ def __init__(self, model: str, corpus: str="oscar"):
if self.corpus not in ["oscar"]:
raise NotImplementedError("LaoNLP doesn't support %s corpus." % self.corpus)
self.load_model(self.model)

def load_model(self, model: str):
"""
Load Word2Vec model
:param str model: model name (cbow or skip-gram)
"""
if model=="cbow":
if model == "cbow":
self.model_path = hf_hub_download(repo_id="wannaphong/Lao-Word-Embedding", filename="lao_oscar_cbow_model.bin")
elif model=="skip-gram":
elif model == "skip-gram":
self.model_path = hf_hub_download(repo_id="wannaphong/Lao-Word-Embedding", filename="lao_oscar_skipgram_model.bin")
else:
raise NotImplementedError("LaoNLP doesn't support %s model." % model)
self.model_wav2vec = gensim.models.keyedvectors.KeyedVectors.load_word2vec_format(self.model_path, binary=True, encoding='utf-8-sig', unicode_errors='ignore')

def get_model(self):
"""
Get gensim.models.keyedvectors.KeyedVectors class
"""
return self.model_wav2vec

def doesnt_match(self, words: List[str]) -> str:
"""
Get donesn't match
Expand All @@ -70,12 +71,12 @@ def doesnt_match(self, words: List[str]) -> str:
:rtype: str
"""
return self.model_wav2vec.doesnt_match(words)

def most_similar_cosmul(self, positive: List[str], negative: List[str]):
return self.model_wav2vec.most_similar_cosmul(
positive=positive, negative=negative
)

def similarity(self, word1: str, word2: str) -> float:
"""
Find similarity between word pairs.
Expand All @@ -86,4 +87,4 @@ def similarity(self, word1: str, word2: str) -> float:
:return: return similarity
:rtype: float
"""
return self.model_wav2vec.similarity(word1, word2)
return self.model_wav2vec.similarity(word1, word2)
2 changes: 1 addition & 1 deletion tests/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@

class TestTagPackage(unittest.TestCase):
def test_word_dictionary(self):
self.assertIsNotNone(word_dictionary("cat","en","lao"))
self.assertIsNotNone(word_dictionary("cat", "en", "lao"))
4 changes: 3 additions & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ def test_lao_digit_to_arabic_digit(self):
lao_digit_to_arabic_digit("໑໒໓໔໕໖໗໘໙໐"),
'1234567890'
)

def test_arabic_digit_to_lao_digit(self):
self.assertEqual(
arabic_digit_to_lao_digit('1234567890'),
"໑໒໓໔໕໖໗໘໙໐"
)

def test_remove_tone_mark(self):
self.assertEqual(
remove_tone_mark("ຜູ້"),
'ຜູ'
)
)
Loading

0 comments on commit ab4dc1e

Please sign in to comment.