Merge pull request #31 from NikitaKozlovtcev/feature/NV-8287-global-i…

…gnore Feature/nv 8287 global ignore
xfenix · Jul 22, 2024 · 0e6e0e9 · 0e6e0e9
2 parents 1f5f151 + 53d3f4d
commit 0e6e0e9
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.
+* `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.

diff --git a/tests/test_spell.py b/tests/test_spell.py
@@ -1,7 +1,11 @@
+import typing
+
 import pytest
 
 from tests._fixtures import COMMON_TEXT_MESSAGE
+from tests.test_spell_views import RU_LANG
 from whole_app import models
+from whole_app.settings import SETTINGS
 from whole_app.spell import SpellCheckService
 
 
@@ -44,3 +48,23 @@ def test_urls_ignored(
         models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True),
     ).run_check()
     assert not corrections
+
+
+@pytest.mark.parametrize(
+    ("wannabe_user_input", "excluded_words"),
+    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})],
+)
+def test_default_excluded_words(
+    wannabe_user_input: str,
+    excluded_words: str,
+    monkeypatch: typing.Any,
+) -> None:
+    with monkeypatch.context() as patcher:
+        patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words)
+        fake_engine: SpellCheckService = SpellCheckService()
+        prepared = fake_engine.prepare(
+            models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False),
+        )
+
+        corrections = prepared.run_check()
+        assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}"  # noqa: SLF001
diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py
@@ -108,24 +108,3 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
     # and than check that excepted word not in the check output
     server_response = run_request()
     assert tested_word not in parse_words(server_response)
-
-
-@pytest.mark.parametrize(
-    ("wannabe_user_input", "excluded_words"),
-    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
-)
-def test_default_excluded_words(
-    app_client: "TestClient",
-    wannabe_user_input: str,
-    excluded_words: str,
-    monkeypatch: typing.Any,
-) -> None:
-    """Dead simple test."""
-    with monkeypatch.context() as patcher:
-        patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
-        server_response: typing.Final = app_client.post(
-            f"{SETTINGS.api_prefix}/check/",
-            json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
-        )
-        assert server_response.status_code == 200
-        assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
diff --git a/whole_app/settings.py b/whole_app/settings.py
@@ -5,6 +5,8 @@
 import pydantic
 import structlog
 import toml
+import typing_extensions
+from pydantic import computed_field
 from pydantic_settings import BaseSettings
 
 
@@ -195,12 +197,32 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
-    exclusion_words: typing.Annotated[
+    exclusion_words_str: typing.Annotated[
         str,
         pydantic.Field(
-            description="list of words which will ignored by default(string separated by comma)",
+            description="String with list of words which will be ignored in /api/check endpoint each request. "
+            "Example: `'foo, bar'`"
         ),
     ] = ""
+    _exclusion_words_set: typing.Annotated[
+        set[str],
+        pydantic.Field(
+            description="""set of words which will ignored by default(filled from exclusion_words_str).
+            Example: `'["foo", "bar"]'` """,
+        ),
+    ] = set()
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def exclusion_words_set(self) -> set[str]:
+        return self._exclusion_words_set
+
+    @pydantic.model_validator(mode="after")
+    def _assemble_exclusion_words_set(self) -> "typing_extensions.Self":
+        self._exclusion_words_set = {
+            one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
+        }
+        return self
 
     class Config:
         env_prefix: str = "spellcheck_"

diff --git a/whole_app/spell.py b/whole_app/spell.py
@@ -31,13 +31,13 @@ def prepare(
         """Initialize machinery."""
         self._input_text = request_payload.text
         self._exclusion_words = exclusion_words if exclusion_words else []
+        self._exclusion_words.extend(SETTINGS.exclusion_words_set)
 
         if request_payload.exclude_urls:
             for one_url in self._url_extractor.find_urls(self._input_text):
                 self._exclusion_words.extend(
-                    {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
+                    {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
                 )
-
         self._spellcheck_engine = SpellChecker(request_payload.language)
         return self
 

diff --git a/whole_app/views.py b/whole_app/views.py
@@ -63,7 +63,6 @@ async def spell_check_main_endpoint(
         exclusion_words = await storage_engine.prepare(
             request_payload.user_name,
         ).fetch_records()
-    exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
     return models.SpellCheckResponse(
         **request_payload.model_dump(),
         corrections=await to_thread.run_sync(