From 0114a57cc347fb81a9b1b9e9fc0a3e5ce6eafeda Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <Nikita.KOZLOVTSEV@raiffeisen.ru>
Date: Mon, 8 Jul 2024 19:12:20 +0300
Subject: [PATCH 1/4] [NV-8287] added excluded words from envs

---
 README.md                 |  1 +
 tests/test_dict_views.py  | 12 ++++++------
 tests/test_spell_views.py | 29 +++++++++++++++++++++++++----
 whole_app/settings.py     |  1 +
 whole_app/views.py        |  3 ++-
 5 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 10794a6..6c73e0d 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,7 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
+* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default `'["Foo", "bar"]'`
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
diff --git a/tests/test_dict_views.py b/tests/test_dict_views.py
index 8aeb01f..9e3b0c5 100644
--- a/tests/test_dict_views.py
+++ b/tests/test_dict_views.py
@@ -42,7 +42,7 @@ def test_add_to_dict(
             json=models.UserDictionaryRequestWithWord(
                 user_name=fake_user_name,
                 exception_word=fake_exc_word,
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 201
         if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
@@ -70,7 +70,7 @@ def test_remove_from_user_dict(
             json=models.UserDictionaryRequestWithWord(
                 user_name=fake_user_name,
                 exception_word=fake_exc_word,
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 200
         if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
@@ -92,7 +92,7 @@ def test_dummy_provider_init(
             json=models.UserDictionaryRequestWithWord(
                 user_name=faker_obj.user_name(),
                 exception_word=faker_obj.word(),
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 201
 
@@ -111,7 +111,7 @@ def test_disabled_dictionary_views(
                 json=models.UserDictionaryRequestWithWord(
                     user_name="test",
                     exception_word="test",
-                ).dict(),
+                ).model_dump(),
             )
             assert server_response.status_code == 404
         # restore back api state to ensure other tests wont break
@@ -124,7 +124,7 @@ def test_empty_auth_key(self: "TestVarious", api_key: str) -> None:
             json=models.UserDictionaryRequestWithWord(
                 user_name="test",
                 exception_word="test",
-            ).dict(),
+            ).model_dump(),
             headers={} if api_key is None else {SETTINGS.api_key_header_name: ""},
         )
         assert server_response.status_code == 403
@@ -135,7 +135,7 @@ def test_wrong_api_key(self: "TestVarious") -> None:
             json=models.UserDictionaryRequestWithWord(
                 user_name="test",
                 exception_word="test",
-            ).dict(),
+            ).model_dump(),
             headers={
                 SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000",
             },
diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py
index 93f7eb2..e3269d3 100644
--- a/tests/test_spell_views.py
+++ b/tests/test_spell_views.py
@@ -27,7 +27,7 @@ def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> No
     """Dead simple test."""
     server_response: typing.Final = app_client.post(
         f"{SETTINGS.api_prefix}/check/",
-        json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).dict(),
+        json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
     )
     assert server_response.status_code == 200
 
@@ -53,7 +53,7 @@ def test_with_corrections_simple(
             text=wannabe_user_input,
             language=RU_LANG,
             user_name=faker_obj.user_name(),
-        ).dict(),
+        ).model_dump(),
     )
     assert server_response.status_code == 200
 
@@ -87,7 +87,7 @@ def run_request() -> typing.Any:
                 text=wannabe_user_input,
                 language=RU_LANG,
                 user_name=user_name,
-            ).dict(),
+            ).model_dump(),
         )
 
     def parse_words(server_response: RequestsResponse) -> typing.Any:
@@ -103,8 +103,29 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
         json=models.UserDictionaryRequestWithWord(
             user_name=user_name,
             exception_word=tested_word,
-        ).dict(),
+        ).model_dump(),
     )
     # and than check that excepted word not in the check output
     server_response = run_request()
     assert tested_word not in parse_words(server_response)
+
+
+@pytest.mark.parametrize(
+    ("wannabe_user_input", "excluded_words"),
+    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", ["шячло", "попячтса"])],
+)
+def test_default_excluded_words(
+    app_client: "TestClient",
+    wannabe_user_input: str,
+    excluded_words: list[str],
+    monkeypatch: typing.Any,
+) -> None:
+    """Dead simple test."""
+    with monkeypatch.context() as patcher:
+        patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
+        server_response: typing.Final = app_client.post(
+            f"{SETTINGS.api_prefix}/check/",
+            json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
+        )
+        assert server_response.status_code == 200
+        assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
diff --git a/whole_app/settings.py b/whole_app/settings.py
index eceb423..83da6ca 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -195,6 +195,7 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
+    exclusion_words: list[str] = []
 
     class Config:
         env_prefix: str = "spellcheck_"
diff --git a/whole_app/views.py b/whole_app/views.py
index 0668bfd..f4496b8 100644
--- a/whole_app/views.py
+++ b/whole_app/views.py
@@ -63,8 +63,9 @@ async def spell_check_main_endpoint(
         exclusion_words = await storage_engine.prepare(
             request_payload.user_name,
         ).fetch_records()
+    exclusion_words.extend(SETTINGS.exclusion_words)
     return models.SpellCheckResponse(
-        **request_payload.dict(),
+        **request_payload.model_dump(),
         corrections=await to_thread.run_sync(
             spell_service.prepare(request_payload, exclusion_words).run_check,
         ),

From 7f0495459c979b006c2daeedf221048f7f21c35f Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <Nikita.KOZLOVTSEV@raiffeisen.ru>
Date: Mon, 8 Jul 2024 19:59:26 +0300
Subject: [PATCH 2/4] [NV-8287] CR fixes, switch list[str] to str

---
 README.md                 | 2 +-
 tests/test_spell_views.py | 4 ++--
 whole_app/settings.py     | 2 +-
 whole_app/views.py        | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 6c73e0d..30ed1e2 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default `'["Foo", "bar"]'`
+* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default(string separated by comma) `"Foo, bar"`
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py
index e3269d3..5a7e78d 100644
--- a/tests/test_spell_views.py
+++ b/tests/test_spell_views.py
@@ -112,12 +112,12 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
 
 @pytest.mark.parametrize(
     ("wannabe_user_input", "excluded_words"),
-    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", ["шячло", "попячтса"])],
+    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
 )
 def test_default_excluded_words(
     app_client: "TestClient",
     wannabe_user_input: str,
-    excluded_words: list[str],
+    excluded_words: str,
     monkeypatch: typing.Any,
 ) -> None:
     """Dead simple test."""
diff --git a/whole_app/settings.py b/whole_app/settings.py
index 83da6ca..adfa265 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -195,7 +195,7 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
-    exclusion_words: list[str] = []
+    exclusion_words: str = ""
 
     class Config:
         env_prefix: str = "spellcheck_"
diff --git a/whole_app/views.py b/whole_app/views.py
index f4496b8..53b2a8c 100644
--- a/whole_app/views.py
+++ b/whole_app/views.py
@@ -63,7 +63,7 @@ async def spell_check_main_endpoint(
         exclusion_words = await storage_engine.prepare(
             request_payload.user_name,
         ).fetch_records()
-    exclusion_words.extend(SETTINGS.exclusion_words)
+    exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
     return models.SpellCheckResponse(
         **request_payload.model_dump(),
         corrections=await to_thread.run_sync(

From cc9f5bb59c4fab39d3b40bb71621a01b23a68704 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <Nikita.KOZLOVTSEV@raiffeisen.ru>
Date: Wed, 10 Jul 2024 14:03:41 +0300
Subject: [PATCH 3/4] [NV-8287] CR fixes

---
 README.md             | 8 ++++----
 whole_app/settings.py | 7 ++++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 30ed1e2..042f8a8 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage
 Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary.
 
 ## Quickstart
-* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
+* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
 * check http://localhost:10113/docs/ for full REST documentation
 * main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization)
 
@@ -35,19 +35,19 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default(string separated by comma) `"Foo, bar"`
+* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
 #### Plain docker
-`docker run  -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
+`docker run  -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
 #### Docker-compose
 * Save this example configuration as `docker-compose.yml`:
 ```yml
 version: "3.9"
 services:
     spellcheck:
-        image: xfenix/spellcheck-microservice:4.0.0
+        image: xfenix/spellcheck-microservice:4.1.0
         ports:
         - "10113:10113"
         volumes:
diff --git a/whole_app/settings.py b/whole_app/settings.py
index adfa265..e1e3033 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -195,7 +195,12 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
-    exclusion_words: str = ""
+    exclusion_words: typing.Annotated[
+        str,
+        pydantic.Field(
+            description="list of words which will ignored by default(string separated by comma)",
+        ),
+    ] = ""
 
     class Config:
         env_prefix: str = "spellcheck_"

From 7a05f23f41f9102319ab04ebc7a3b86056b6890e Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Fri, 12 Jul 2024 16:06:52 +0300
Subject: [PATCH 4/4] fix github

---
 whole_app/spell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/whole_app/spell.py b/whole_app/spell.py
index a11d534..d4b11bd 100644
--- a/whole_app/spell.py
+++ b/whole_app/spell.py
@@ -35,7 +35,7 @@ def prepare(
         if request_payload.exclude_urls:
             for one_url in self._url_extractor.find_urls(self._input_text):
                 self._exclusion_words.extend(
-                    [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)]
+                    {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
                 )
 
         self._spellcheck_engine = SpellChecker(request_payload.language)