Merge pull request #30 from NikitaKozlovtcev/feature/NV-8287-added-ex…

…cluded-words-from-envs [NV-8287] added excluded words from envs
xfenix · Jul 15, 2024 · 1f5f151 · 1f5f151
2 parents aaf0cc5 + 7a05f23
commit 1f5f151
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage
 Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary.
 
 ## Quickstart
-* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
+* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
 * check http://localhost:10113/docs/ for full REST documentation
 * main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization)
 
@@ -35,18 +35,19 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
+* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
 #### Plain docker
-`docker run  -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
+`docker run  -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
 #### Docker-compose
 * Save this example configuration as `docker-compose.yml`:
 ```yml
 version: "3.9"
 services:
     spellcheck:
-        image: xfenix/spellcheck-microservice:4.0.0
+        image: xfenix/spellcheck-microservice:4.1.0
         ports:
         - "10113:10113"
         volumes:

diff --git a/tests/test_dict_views.py b/tests/test_dict_views.py
@@ -42,7 +42,7 @@ def test_add_to_dict(
             json=models.UserDictionaryRequestWithWord(
                 user_name=fake_user_name,
                 exception_word=fake_exc_word,
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 201
         if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
@@ -70,7 +70,7 @@ def test_remove_from_user_dict(
             json=models.UserDictionaryRequestWithWord(
                 user_name=fake_user_name,
                 exception_word=fake_exc_word,
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 200
         if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
@@ -92,7 +92,7 @@ def test_dummy_provider_init(
             json=models.UserDictionaryRequestWithWord(
                 user_name=faker_obj.user_name(),
                 exception_word=faker_obj.word(),
-            ).dict(),
+            ).model_dump(),
         )
         assert server_response.status_code == 201
 
@@ -111,7 +111,7 @@ def test_disabled_dictionary_views(
                 json=models.UserDictionaryRequestWithWord(
                     user_name="test",
                     exception_word="test",
-                ).dict(),
+                ).model_dump(),
             )
             assert server_response.status_code == 404
         # restore back api state to ensure other tests wont break
@@ -124,7 +124,7 @@ def test_empty_auth_key(self: "TestVarious", api_key: str) -> None:
             json=models.UserDictionaryRequestWithWord(
                 user_name="test",
                 exception_word="test",
-            ).dict(),
+            ).model_dump(),
             headers={} if api_key is None else {SETTINGS.api_key_header_name: ""},
         )
         assert server_response.status_code == 403
@@ -135,7 +135,7 @@ def test_wrong_api_key(self: "TestVarious") -> None:
             json=models.UserDictionaryRequestWithWord(
                 user_name="test",
                 exception_word="test",
-            ).dict(),
+            ).model_dump(),
             headers={
                 SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000",
             },

diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py
@@ -27,7 +27,7 @@ def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> No
     """Dead simple test."""
     server_response: typing.Final = app_client.post(
         f"{SETTINGS.api_prefix}/check/",
-        json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).dict(),
+        json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
     )
     assert server_response.status_code == 200
 
@@ -53,7 +53,7 @@ def test_with_corrections_simple(
             text=wannabe_user_input,
             language=RU_LANG,
             user_name=faker_obj.user_name(),
-        ).dict(),
+        ).model_dump(),
     )
     assert server_response.status_code == 200
 
@@ -87,7 +87,7 @@ def run_request() -> typing.Any:
                 text=wannabe_user_input,
                 language=RU_LANG,
                 user_name=user_name,
-            ).dict(),
+            ).model_dump(),
         )
 
     def parse_words(server_response: RequestsResponse) -> typing.Any:
@@ -103,8 +103,29 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
         json=models.UserDictionaryRequestWithWord(
             user_name=user_name,
             exception_word=tested_word,
-        ).dict(),
+        ).model_dump(),
     )
     # and than check that excepted word not in the check output
     server_response = run_request()
     assert tested_word not in parse_words(server_response)
+
+
+@pytest.mark.parametrize(
+    ("wannabe_user_input", "excluded_words"),
+    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
+)
+def test_default_excluded_words(
+    app_client: "TestClient",
+    wannabe_user_input: str,
+    excluded_words: str,
+    monkeypatch: typing.Any,
+) -> None:
+    """Dead simple test."""
+    with monkeypatch.context() as patcher:
+        patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
+        server_response: typing.Final = app_client.post(
+            f"{SETTINGS.api_prefix}/check/",
+            json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
+        )
+        assert server_response.status_code == 200
+        assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
diff --git a/whole_app/settings.py b/whole_app/settings.py
@@ -195,6 +195,12 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
+    exclusion_words: typing.Annotated[
+        str,
+        pydantic.Field(
+            description="list of words which will ignored by default(string separated by comma)",
+        ),
+    ] = ""
 
     class Config:
         env_prefix: str = "spellcheck_"

diff --git a/whole_app/spell.py b/whole_app/spell.py
@@ -35,7 +35,7 @@ def prepare(
         if request_payload.exclude_urls:
             for one_url in self._url_extractor.find_urls(self._input_text):
                 self._exclusion_words.extend(
-                    [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)]
+                    {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
                 )
 
         self._spellcheck_engine = SpellChecker(request_payload.language)

diff --git a/whole_app/views.py b/whole_app/views.py
@@ -63,8 +63,9 @@ async def spell_check_main_endpoint(
         exclusion_words = await storage_engine.prepare(
             request_payload.user_name,
         ).fetch_records()
+    exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
     return models.SpellCheckResponse(
-        **request_payload.dict(),
+        **request_payload.model_dump(),
         corrections=await to_thread.run_sync(
             spell_service.prepare(request_payload, exclusion_words).run_check,
         ),