From 04f8b56531af4ac50cb9148aafc8a4e9bdc6ff48 Mon Sep 17 00:00:00 2001 From: LyubomirT Date: Wed, 29 Nov 2023 13:38:54 +0200 Subject: [PATCH] Refactor wordlist loading and remove duplicates. Delete `config`. --- config | 1 - lesp/autocorrect.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) delete mode 100644 config diff --git a/config b/config deleted file mode 100644 index 41bcefe..0000000 --- a/config +++ /dev/null @@ -1 +0,0 @@ -wordlist="small_wordlist.txt" \ No newline at end of file diff --git a/lesp/autocorrect.py b/lesp/autocorrect.py index 289e31f..9721909 100644 --- a/lesp/autocorrect.py +++ b/lesp/autocorrect.py @@ -9,7 +9,11 @@ def __init__(self, wordlist_path="lesp-wordlist.txt"): def load_wordlist(self): try: with open(self.wordlist_path, "r") as f: - self.wordlist = f.read().split("\n") + self.wordlist = f.read().strip().split("\n") + # Remove duplicate words in the wordlist + self.wordlist = list(set(self.wordlist)) + # Remove leading and trailing whitespaces from each word + self.wordlist = [word.strip() for word in self.wordlist] if not all(word.isalpha() for word in self.wordlist): raise ValueError("Invalid wordlist format. Words must contain only alphabetic characters.") except FileNotFoundError: