From b0b661a97cd6b72a48669046c017a169346d0cc0 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <Nikita.KOZLOVTSEV@raiffeisen.ru>
Date: Fri, 12 Jul 2024 09:33:16 +0300
Subject: [PATCH 1/7] [NV-8287] global ignore

---
 README.md                 |  3 ++-
 tests/test_spell.py       | 26 +++++++++++++++++++++++++-
 tests/test_spell_views.py | 21 ---------------------
 whole_app/settings.py     | 16 +++++++++++++++-
 whole_app/spell.py        |  4 ++--
 whole_app/views.py        |  1 -
 6 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 042f8a8..7a453d1 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,8 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.
+* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Default value is empty string.
+* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). Default value is `set()`.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
diff --git a/tests/test_spell.py b/tests/test_spell.py
index 4c0e063..8f7e4ff 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -1,7 +1,11 @@
+import typing
+
 import pytest
 
 from tests._fixtures import COMMON_TEXT_MESSAGE
+from tests.test_spell_views import RU_LANG
 from whole_app import models
+from whole_app.settings import SETTINGS
 from whole_app.spell import SpellCheckService
 
 
@@ -41,6 +45,26 @@ def test_urls_ignored(
 ) -> None:
     fake_engine: SpellCheckService = SpellCheckService()
     corrections = fake_engine.prepare(
-        models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True),
+        models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language=RU_LANG, exclude_urls=True),
     ).run_check()
     assert not corrections
+
+
+@pytest.mark.parametrize(
+    ("wannabe_user_input", "excluded_words"),
+    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})],
+)
+def test_default_excluded_words(
+    wannabe_user_input: str,
+    excluded_words: str,
+    monkeypatch: typing.Any,
+) -> None:
+    with monkeypatch.context() as patcher:
+        patcher.setattr(SETTINGS, "exclusion_words_set", excluded_words)
+        fake_engine: SpellCheckService = SpellCheckService()
+        prepared = fake_engine.prepare(
+            models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False),
+        )
+
+        corrections = prepared.run_check()
+        assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}"  # noqa: SLF001
diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py
index 5a7e78d..03a99fb 100644
--- a/tests/test_spell_views.py
+++ b/tests/test_spell_views.py
@@ -108,24 +108,3 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
     # and than check that excepted word not in the check output
     server_response = run_request()
     assert tested_word not in parse_words(server_response)
-
-
-@pytest.mark.parametrize(
-    ("wannabe_user_input", "excluded_words"),
-    [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
-)
-def test_default_excluded_words(
-    app_client: "TestClient",
-    wannabe_user_input: str,
-    excluded_words: str,
-    monkeypatch: typing.Any,
-) -> None:
-    """Dead simple test."""
-    with monkeypatch.context() as patcher:
-        patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
-        server_response: typing.Final = app_client.post(
-            f"{SETTINGS.api_prefix}/check/",
-            json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
-        )
-        assert server_response.status_code == 200
-        assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
diff --git a/whole_app/settings.py b/whole_app/settings.py
index e1e3033..0a12576 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -5,6 +5,7 @@
 import pydantic
 import structlog
 import toml
+import typing_extensions
 from pydantic_settings import BaseSettings
 
 
@@ -195,12 +196,25 @@ class SettingsOfMicroservice(BaseSettings):
         ),
     ] = 60
     username_regex: str = r"^[a-zA-Z0-9-_]*$"
-    exclusion_words: typing.Annotated[
+    exclusion_words_str: typing.Annotated[
         str,
         pydantic.Field(
             description="list of words which will ignored by default(string separated by comma)",
         ),
     ] = ""
+    exclusion_words_set: typing.Annotated[
+        set[str],
+        pydantic.Field(
+            description="set of words which will ignored by default(filled from exclusion_words_str)",
+        ),
+    ] = set()
+
+    @pydantic.model_validator(mode="after")
+    def validate_block_structure(self) -> "typing_extensions.Self":
+        self.exclusion_words_set = {
+            one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
+        }
+        return self
 
     class Config:
         env_prefix: str = "spellcheck_"
diff --git a/whole_app/spell.py b/whole_app/spell.py
index a11d534..9824fc1 100644
--- a/whole_app/spell.py
+++ b/whole_app/spell.py
@@ -31,13 +31,13 @@ def prepare(
         """Initialize machinery."""
         self._input_text = request_payload.text
         self._exclusion_words = exclusion_words if exclusion_words else []
+        self._exclusion_words.extend(SETTINGS.exclusion_words_set)
 
         if request_payload.exclude_urls:
             for one_url in self._url_extractor.find_urls(self._input_text):
                 self._exclusion_words.extend(
-                    [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)]
+                    {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
                 )
-
         self._spellcheck_engine = SpellChecker(request_payload.language)
         return self
 
diff --git a/whole_app/views.py b/whole_app/views.py
index 53b2a8c..89ef86c 100644
--- a/whole_app/views.py
+++ b/whole_app/views.py
@@ -63,7 +63,6 @@ async def spell_check_main_endpoint(
         exclusion_words = await storage_engine.prepare(
             request_payload.user_name,
         ).fetch_records()
-    exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
     return models.SpellCheckResponse(
         **request_payload.model_dump(),
         corrections=await to_thread.run_sync(

From 93718828e44de3d573d4d0b3b2229acdca33f899 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Fri, 12 Jul 2024 15:32:16 +0300
Subject: [PATCH 2/7] global ignore

---
 Taskfile.yml        | 20 ++++++++++++++++++++
 scripts/__main__.py |  4 ++--
 tests/test_spell.py |  2 +-
 3 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 Taskfile.yml

diff --git a/Taskfile.yml b/Taskfile.yml
new file mode 100644
index 0000000..12da358
--- /dev/null
+++ b/Taskfile.yml
@@ -0,0 +1,20 @@
+version: "3"
+
+tasks:
+  build:
+    desc: "build application docker container"
+    cmds:
+      - docker build -t spellcheck-microservice .
+
+  tests:
+    desc: "run pytest (pass args after '--')"
+    cmds:
+      - task: build
+      - docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . {{.CLI_ARGS}}"
+
+  lint:
+    desc: "run linters"
+    cmds:
+      - poetry run ruff format .
+      - poetry run ruff check . --fix
+      - poetry run mypy .
diff --git a/scripts/__main__.py b/scripts/__main__.py
index 20b5a32..6086a54 100755
--- a/scripts/__main__.py
+++ b/scripts/__main__.py
@@ -22,7 +22,7 @@ def _update_dockerhub_readme() -> None:
         README_PATH.read_text(),
         flags=re.IGNORECASE | re.DOTALL,
     ).strip()
-    new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
+    new_content = replace_tag_in_readme(new_content, "4.1.0")
     README_PATH.write_text(new_content + "\n")
 
 
@@ -61,7 +61,7 @@ def _update_readme() -> None:
         new_content,
         flags=re.IGNORECASE | re.MULTILINE | re.DOTALL,
     )
-    new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
+    new_content = replace_tag_in_readme(new_content, "4.1.0")
     README_PATH.write_text(new_content)
 
 
diff --git a/tests/test_spell.py b/tests/test_spell.py
index 8f7e4ff..af4f035 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -45,7 +45,7 @@ def test_urls_ignored(
 ) -> None:
     fake_engine: SpellCheckService = SpellCheckService()
     corrections = fake_engine.prepare(
-        models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language=RU_LANG, exclude_urls=True),
+        models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True),
     ).run_check()
     assert not corrections
 

From e35295a4b92f40d1ea180d75964ffc82bfd4668e Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Fri, 12 Jul 2024 15:47:10 +0300
Subject: [PATCH 3/7] gihub email fixes

---
 Taskfile.yml | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 Taskfile.yml

diff --git a/Taskfile.yml b/Taskfile.yml
deleted file mode 100644
index 12da358..0000000
--- a/Taskfile.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-version: "3"
-
-tasks:
-  build:
-    desc: "build application docker container"
-    cmds:
-      - docker build -t spellcheck-microservice .
-
-  tests:
-    desc: "run pytest (pass args after '--')"
-    cmds:
-      - task: build
-      - docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . {{.CLI_ARGS}}"
-
-  lint:
-    desc: "run linters"
-    cmds:
-      - poetry run ruff format .
-      - poetry run ruff check . --fix
-      - poetry run mypy .

From a9045e7d3a382ef325e7850bf0a0c10dca92f46c Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Fri, 12 Jul 2024 16:15:28 +0300
Subject: [PATCH 4/7] update settings

---
 whole_app/settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/whole_app/settings.py b/whole_app/settings.py
index 0a12576..57b724a 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -210,7 +210,7 @@ class SettingsOfMicroservice(BaseSettings):
     ] = set()
 
     @pydantic.model_validator(mode="after")
-    def validate_block_structure(self) -> "typing_extensions.Self":
+    def assemble_exclusion_words_set(self) -> "typing_extensions.Self":
         self.exclusion_words_set = {
             one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
         }

From fdb8073144ac2cfed73e9d866f83ad4261332d55 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Fri, 12 Jul 2024 16:15:28 +0300
Subject: [PATCH 5/7] update settings

---
 scripts/__main__.py   | 4 ++--
 whole_app/settings.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/__main__.py b/scripts/__main__.py
index 6086a54..20b5a32 100755
--- a/scripts/__main__.py
+++ b/scripts/__main__.py
@@ -22,7 +22,7 @@ def _update_dockerhub_readme() -> None:
         README_PATH.read_text(),
         flags=re.IGNORECASE | re.DOTALL,
     ).strip()
-    new_content = replace_tag_in_readme(new_content, "4.1.0")
+    new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
     README_PATH.write_text(new_content + "\n")
 
 
@@ -61,7 +61,7 @@ def _update_readme() -> None:
         new_content,
         flags=re.IGNORECASE | re.MULTILINE | re.DOTALL,
     )
-    new_content = replace_tag_in_readme(new_content, "4.1.0")
+    new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
     README_PATH.write_text(new_content)
 
 
diff --git a/whole_app/settings.py b/whole_app/settings.py
index 0a12576..57b724a 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -210,7 +210,7 @@ class SettingsOfMicroservice(BaseSettings):
     ] = set()
 
     @pydantic.model_validator(mode="after")
-    def validate_block_structure(self) -> "typing_extensions.Self":
+    def assemble_exclusion_words_set(self) -> "typing_extensions.Self":
         self.exclusion_words_set = {
             one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
         }

From 1e564d8b503fcaf4675b4d228f5c1bf5941d3136 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Tue, 16 Jul 2024 09:32:31 +0300
Subject: [PATCH 6/7] CR fixes

---
 README.md             |  5 +++--
 whole_app/settings.py | 12 +++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7a453d1..4f41043 100644
--- a/README.md
+++ b/README.md
@@ -35,8 +35,9 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Default value is empty string.
-* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). Default value is `set()`.
+* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Example: 'foo, bar'. Default value is empty string.
+* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str).
+            Example: '["foo", "bar"]' . Default value is `set()`.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
diff --git a/whole_app/settings.py b/whole_app/settings.py
index 57b724a..a1258e0 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -199,21 +199,23 @@ class SettingsOfMicroservice(BaseSettings):
     exclusion_words_str: typing.Annotated[
         str,
         pydantic.Field(
-            description="list of words which will ignored by default(string separated by comma)",
+            description="list of words which will ignored by default(string separated by comma). " "Example: 'foo, bar'"
         ),
     ] = ""
     exclusion_words_set: typing.Annotated[
         set[str],
         pydantic.Field(
-            description="set of words which will ignored by default(filled from exclusion_words_str)",
+            description="""set of words which will ignored by default(filled from exclusion_words_str).
+            Example: '["foo", "bar"]' """,
         ),
     ] = set()
 
     @pydantic.model_validator(mode="after")
     def assemble_exclusion_words_set(self) -> "typing_extensions.Self":
-        self.exclusion_words_set = {
-            one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
-        }
+        if not self.exclusion_words_set:
+            self.exclusion_words_set = {
+                one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
+            }
         return self
 
     class Config:

From 53d3f4d2f05f04837f92a3d767446cf95369e9e7 Mon Sep 17 00:00:00 2001
From: Nikita Kozlovtsev <nikita.kozlovtcev@gmail.com>
Date: Tue, 16 Jul 2024 13:17:16 +0300
Subject: [PATCH 7/7] CR fixes

---
 README.md             |  4 +---
 tests/test_spell.py   |  2 +-
 whole_app/settings.py | 22 ++++++++++++++--------
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 4f41043..bbdc2a3 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,7 @@ You can change config of the service by changing the environment variables. Here
 * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
 * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
 * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
-* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Example: 'foo, bar'. Default value is empty string.
-* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str).
-            Example: '["foo", "bar"]' . Default value is `set()`.
+* `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string.
 
 ### Deployment
 Note: all docker & docker-compose variants use named volumes to store user dictionaries.
diff --git a/tests/test_spell.py b/tests/test_spell.py
index af4f035..b5a9510 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -60,7 +60,7 @@ def test_default_excluded_words(
     monkeypatch: typing.Any,
 ) -> None:
     with monkeypatch.context() as patcher:
-        patcher.setattr(SETTINGS, "exclusion_words_set", excluded_words)
+        patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words)
         fake_engine: SpellCheckService = SpellCheckService()
         prepared = fake_engine.prepare(
             models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False),
diff --git a/whole_app/settings.py b/whole_app/settings.py
index a1258e0..d682e50 100644
--- a/whole_app/settings.py
+++ b/whole_app/settings.py
@@ -6,6 +6,7 @@
 import structlog
 import toml
 import typing_extensions
+from pydantic import computed_field
 from pydantic_settings import BaseSettings
 
 
@@ -199,23 +200,28 @@ class SettingsOfMicroservice(BaseSettings):
     exclusion_words_str: typing.Annotated[
         str,
         pydantic.Field(
-            description="list of words which will ignored by default(string separated by comma). " "Example: 'foo, bar'"
+            description="String with list of words which will be ignored in /api/check endpoint each request. "
+            "Example: `'foo, bar'`"
         ),
     ] = ""
-    exclusion_words_set: typing.Annotated[
+    _exclusion_words_set: typing.Annotated[
         set[str],
         pydantic.Field(
             description="""set of words which will ignored by default(filled from exclusion_words_str).
-            Example: '["foo", "bar"]' """,
+            Example: `'["foo", "bar"]'` """,
         ),
     ] = set()
 
+    @computed_field  # type: ignore[misc]
+    @property
+    def exclusion_words_set(self) -> set[str]:
+        return self._exclusion_words_set
+
     @pydantic.model_validator(mode="after")
-    def assemble_exclusion_words_set(self) -> "typing_extensions.Self":
-        if not self.exclusion_words_set:
-            self.exclusion_words_set = {
-                one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
-            }
+    def _assemble_exclusion_words_set(self) -> "typing_extensions.Self":
+        self._exclusion_words_set = {
+            one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
+        }
         return self
 
     class Config: