diff --git a/README.md b/README.md index 042f8a8..bbdc2a3 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string. +* `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/tests/test_spell.py b/tests/test_spell.py index 4c0e063..b5a9510 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -1,7 +1,11 @@ +import typing + import pytest from tests._fixtures import COMMON_TEXT_MESSAGE +from tests.test_spell_views import RU_LANG from whole_app import models +from whole_app.settings import SETTINGS from whole_app.spell import SpellCheckService @@ -44,3 +48,23 @@ def test_urls_ignored( models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True), ).run_check() assert not corrections + + +@pytest.mark.parametrize( + ("wannabe_user_input", "excluded_words"), + [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})], +) +def test_default_excluded_words( + wannabe_user_input: str, + excluded_words: str, + monkeypatch: typing.Any, +) -> None: + with monkeypatch.context() as patcher: + patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words) + fake_engine: SpellCheckService = SpellCheckService() + prepared = fake_engine.prepare( + models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False), + ) + + corrections = prepared.run_check() + assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}" # noqa: SLF001 diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py index 5a7e78d..03a99fb 100644 --- a/tests/test_spell_views.py +++ b/tests/test_spell_views.py @@ -108,24 +108,3 @@ def parse_words(server_response: RequestsResponse) -> typing.Any: # and than check that excepted word not in the check output server_response = run_request() assert tested_word not in parse_words(server_response) - - -@pytest.mark.parametrize( - ("wannabe_user_input", "excluded_words"), - [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")], -) -def test_default_excluded_words( - app_client: "TestClient", - wannabe_user_input: str, - excluded_words: str, - monkeypatch: typing.Any, -) -> None: - """Dead simple test.""" - with monkeypatch.context() as patcher: - patcher.setattr(SETTINGS, "exclusion_words", excluded_words) - server_response: typing.Final = app_client.post( - f"{SETTINGS.api_prefix}/check/", - json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), - ) - assert server_response.status_code == 200 - assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}" diff --git a/whole_app/settings.py b/whole_app/settings.py index e1e3033..d682e50 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -5,6 +5,8 @@ import pydantic import structlog import toml +import typing_extensions +from pydantic import computed_field from pydantic_settings import BaseSettings @@ -195,12 +197,32 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" - exclusion_words: typing.Annotated[ + exclusion_words_str: typing.Annotated[ str, pydantic.Field( - description="list of words which will ignored by default(string separated by comma)", + description="String with list of words which will be ignored in /api/check endpoint each request. " + "Example: `'foo, bar'`" ), ] = "" + _exclusion_words_set: typing.Annotated[ + set[str], + pydantic.Field( + description="""set of words which will ignored by default(filled from exclusion_words_str). + Example: `'["foo", "bar"]'` """, + ), + ] = set() + + @computed_field # type: ignore[misc] + @property + def exclusion_words_set(self) -> set[str]: + return self._exclusion_words_set + + @pydantic.model_validator(mode="after") + def _assemble_exclusion_words_set(self) -> "typing_extensions.Self": + self._exclusion_words_set = { + one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word + } + return self class Config: env_prefix: str = "spellcheck_" diff --git a/whole_app/spell.py b/whole_app/spell.py index d4b11bd..9824fc1 100644 --- a/whole_app/spell.py +++ b/whole_app/spell.py @@ -31,13 +31,13 @@ def prepare( """Initialize machinery.""" self._input_text = request_payload.text self._exclusion_words = exclusion_words if exclusion_words else [] + self._exclusion_words.extend(SETTINGS.exclusion_words_set) if request_payload.exclude_urls: for one_url in self._url_extractor.find_urls(self._input_text): self._exclusion_words.extend( - {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} + {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} ) - self._spellcheck_engine = SpellChecker(request_payload.language) return self diff --git a/whole_app/views.py b/whole_app/views.py index 53b2a8c..89ef86c 100644 --- a/whole_app/views.py +++ b/whole_app/views.py @@ -63,7 +63,6 @@ async def spell_check_main_endpoint( exclusion_words = await storage_engine.prepare( request_payload.user_name, ).fetch_records() - exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")]) return models.SpellCheckResponse( **request_payload.model_dump(), corrections=await to_thread.run_sync(