From b0b661a97cd6b72a48669046c017a169346d0cc0 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 09:33:16 +0300 Subject: [PATCH 1/7] [NV-8287] global ignore --- README.md | 3 ++- tests/test_spell.py | 26 +++++++++++++++++++++++++- tests/test_spell_views.py | 21 --------------------- whole_app/settings.py | 16 +++++++++++++++- whole_app/spell.py | 4 ++-- whole_app/views.py | 1 - 6 files changed, 44 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 042f8a8..7a453d1 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,8 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string. +* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Default value is empty string. +* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). Default value is `set()`. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/tests/test_spell.py b/tests/test_spell.py index 4c0e063..8f7e4ff 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -1,7 +1,11 @@ +import typing + import pytest from tests._fixtures import COMMON_TEXT_MESSAGE +from tests.test_spell_views import RU_LANG from whole_app import models +from whole_app.settings import SETTINGS from whole_app.spell import SpellCheckService @@ -41,6 +45,26 @@ def test_urls_ignored( ) -> None: fake_engine: SpellCheckService = SpellCheckService() corrections = fake_engine.prepare( - models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True), + models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language=RU_LANG, exclude_urls=True), ).run_check() assert not corrections + + +@pytest.mark.parametrize( + ("wannabe_user_input", "excluded_words"), + [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})], +) +def test_default_excluded_words( + wannabe_user_input: str, + excluded_words: str, + monkeypatch: typing.Any, +) -> None: + with monkeypatch.context() as patcher: + patcher.setattr(SETTINGS, "exclusion_words_set", excluded_words) + fake_engine: SpellCheckService = SpellCheckService() + prepared = fake_engine.prepare( + models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False), + ) + + corrections = prepared.run_check() + assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}" # noqa: SLF001 diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py index 5a7e78d..03a99fb 100644 --- a/tests/test_spell_views.py +++ b/tests/test_spell_views.py @@ -108,24 +108,3 @@ def parse_words(server_response: RequestsResponse) -> typing.Any: # and than check that excepted word not in the check output server_response = run_request() assert tested_word not in parse_words(server_response) - - -@pytest.mark.parametrize( - ("wannabe_user_input", "excluded_words"), - [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")], -) -def test_default_excluded_words( - app_client: "TestClient", - wannabe_user_input: str, - excluded_words: str, - monkeypatch: typing.Any, -) -> None: - """Dead simple test.""" - with monkeypatch.context() as patcher: - patcher.setattr(SETTINGS, "exclusion_words", excluded_words) - server_response: typing.Final = app_client.post( - f"{SETTINGS.api_prefix}/check/", - json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), - ) - assert server_response.status_code == 200 - assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}" diff --git a/whole_app/settings.py b/whole_app/settings.py index e1e3033..0a12576 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -5,6 +5,7 @@ import pydantic import structlog import toml +import typing_extensions from pydantic_settings import BaseSettings @@ -195,12 +196,25 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" - exclusion_words: typing.Annotated[ + exclusion_words_str: typing.Annotated[ str, pydantic.Field( description="list of words which will ignored by default(string separated by comma)", ), ] = "" + exclusion_words_set: typing.Annotated[ + set[str], + pydantic.Field( + description="set of words which will ignored by default(filled from exclusion_words_str)", + ), + ] = set() + + @pydantic.model_validator(mode="after") + def validate_block_structure(self) -> "typing_extensions.Self": + self.exclusion_words_set = { + one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word + } + return self class Config: env_prefix: str = "spellcheck_" diff --git a/whole_app/spell.py b/whole_app/spell.py index a11d534..9824fc1 100644 --- a/whole_app/spell.py +++ b/whole_app/spell.py @@ -31,13 +31,13 @@ def prepare( """Initialize machinery.""" self._input_text = request_payload.text self._exclusion_words = exclusion_words if exclusion_words else [] + self._exclusion_words.extend(SETTINGS.exclusion_words_set) if request_payload.exclude_urls: for one_url in self._url_extractor.find_urls(self._input_text): self._exclusion_words.extend( - [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)] + {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} ) - self._spellcheck_engine = SpellChecker(request_payload.language) return self diff --git a/whole_app/views.py b/whole_app/views.py index 53b2a8c..89ef86c 100644 --- a/whole_app/views.py +++ b/whole_app/views.py @@ -63,7 +63,6 @@ async def spell_check_main_endpoint( exclusion_words = await storage_engine.prepare( request_payload.user_name, ).fetch_records() - exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")]) return models.SpellCheckResponse( **request_payload.model_dump(), corrections=await to_thread.run_sync( From 93718828e44de3d573d4d0b3b2229acdca33f899 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 15:32:16 +0300 Subject: [PATCH 2/7] global ignore --- Taskfile.yml | 20 ++++++++++++++++++++ scripts/__main__.py | 4 ++-- tests/test_spell.py | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 Taskfile.yml diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 0000000..12da358 --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,20 @@ +version: "3" + +tasks: + build: + desc: "build application docker container" + cmds: + - docker build -t spellcheck-microservice . + + tests: + desc: "run pytest (pass args after '--')" + cmds: + - task: build + - docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . {{.CLI_ARGS}}" + + lint: + desc: "run linters" + cmds: + - poetry run ruff format . + - poetry run ruff check . --fix + - poetry run mypy . diff --git a/scripts/__main__.py b/scripts/__main__.py index 20b5a32..6086a54 100755 --- a/scripts/__main__.py +++ b/scripts/__main__.py @@ -22,7 +22,7 @@ def _update_dockerhub_readme() -> None: README_PATH.read_text(), flags=re.IGNORECASE | re.DOTALL, ).strip() - new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) + new_content = replace_tag_in_readme(new_content, "4.1.0") README_PATH.write_text(new_content + "\n") @@ -61,7 +61,7 @@ def _update_readme() -> None: new_content, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL, ) - new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) + new_content = replace_tag_in_readme(new_content, "4.1.0") README_PATH.write_text(new_content) diff --git a/tests/test_spell.py b/tests/test_spell.py index 8f7e4ff..af4f035 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -45,7 +45,7 @@ def test_urls_ignored( ) -> None: fake_engine: SpellCheckService = SpellCheckService() corrections = fake_engine.prepare( - models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language=RU_LANG, exclude_urls=True), + models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True), ).run_check() assert not corrections From e35295a4b92f40d1ea180d75964ffc82bfd4668e Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 15:47:10 +0300 Subject: [PATCH 3/7] gihub email fixes --- Taskfile.yml | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 Taskfile.yml diff --git a/Taskfile.yml b/Taskfile.yml deleted file mode 100644 index 12da358..0000000 --- a/Taskfile.yml +++ /dev/null @@ -1,20 +0,0 @@ -version: "3" - -tasks: - build: - desc: "build application docker container" - cmds: - - docker build -t spellcheck-microservice . - - tests: - desc: "run pytest (pass args after '--')" - cmds: - - task: build - - docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . {{.CLI_ARGS}}" - - lint: - desc: "run linters" - cmds: - - poetry run ruff format . - - poetry run ruff check . --fix - - poetry run mypy . From a9045e7d3a382ef325e7850bf0a0c10dca92f46c Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 16:15:28 +0300 Subject: [PATCH 4/7] update settings --- whole_app/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whole_app/settings.py b/whole_app/settings.py index 0a12576..57b724a 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -210,7 +210,7 @@ class SettingsOfMicroservice(BaseSettings): ] = set() @pydantic.model_validator(mode="after") - def validate_block_structure(self) -> "typing_extensions.Self": + def assemble_exclusion_words_set(self) -> "typing_extensions.Self": self.exclusion_words_set = { one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word } From fdb8073144ac2cfed73e9d866f83ad4261332d55 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 16:15:28 +0300 Subject: [PATCH 5/7] update settings --- scripts/__main__.py | 4 ++-- whole_app/settings.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/__main__.py b/scripts/__main__.py index 6086a54..20b5a32 100755 --- a/scripts/__main__.py +++ b/scripts/__main__.py @@ -22,7 +22,7 @@ def _update_dockerhub_readme() -> None: README_PATH.read_text(), flags=re.IGNORECASE | re.DOTALL, ).strip() - new_content = replace_tag_in_readme(new_content, "4.1.0") + new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) README_PATH.write_text(new_content + "\n") @@ -61,7 +61,7 @@ def _update_readme() -> None: new_content, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL, ) - new_content = replace_tag_in_readme(new_content, "4.1.0") + new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) README_PATH.write_text(new_content) diff --git a/whole_app/settings.py b/whole_app/settings.py index 0a12576..57b724a 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -210,7 +210,7 @@ class SettingsOfMicroservice(BaseSettings): ] = set() @pydantic.model_validator(mode="after") - def validate_block_structure(self) -> "typing_extensions.Self": + def assemble_exclusion_words_set(self) -> "typing_extensions.Self": self.exclusion_words_set = { one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word } From 1e564d8b503fcaf4675b4d228f5c1bf5941d3136 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Tue, 16 Jul 2024 09:32:31 +0300 Subject: [PATCH 6/7] CR fixes --- README.md | 5 +++-- whole_app/settings.py | 12 +++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7a453d1..4f41043 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,9 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Default value is empty string. -* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). Default value is `set()`. +* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Example: 'foo, bar'. Default value is empty string. +* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). + Example: '["foo", "bar"]' . Default value is `set()`. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/whole_app/settings.py b/whole_app/settings.py index 57b724a..a1258e0 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -199,21 +199,23 @@ class SettingsOfMicroservice(BaseSettings): exclusion_words_str: typing.Annotated[ str, pydantic.Field( - description="list of words which will ignored by default(string separated by comma)", + description="list of words which will ignored by default(string separated by comma). " "Example: 'foo, bar'" ), ] = "" exclusion_words_set: typing.Annotated[ set[str], pydantic.Field( - description="set of words which will ignored by default(filled from exclusion_words_str)", + description="""set of words which will ignored by default(filled from exclusion_words_str). + Example: '["foo", "bar"]' """, ), ] = set() @pydantic.model_validator(mode="after") def assemble_exclusion_words_set(self) -> "typing_extensions.Self": - self.exclusion_words_set = { - one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word - } + if not self.exclusion_words_set: + self.exclusion_words_set = { + one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word + } return self class Config: From 53d3f4d2f05f04837f92a3d767446cf95369e9e7 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Tue, 16 Jul 2024 13:17:16 +0300 Subject: [PATCH 7/7] CR fixes --- README.md | 4 +--- tests/test_spell.py | 2 +- whole_app/settings.py | 22 ++++++++++++++-------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4f41043..bbdc2a3 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,7 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS_STR` list of words which will ignored by default(string separated by comma). Example: 'foo, bar'. Default value is empty string. -* `SPELLCHECK_EXCLUSION_WORDS_SET` set of words which will ignored by default(filled from exclusion_words_str). - Example: '["foo", "bar"]' . Default value is `set()`. +* `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/tests/test_spell.py b/tests/test_spell.py index af4f035..b5a9510 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -60,7 +60,7 @@ def test_default_excluded_words( monkeypatch: typing.Any, ) -> None: with monkeypatch.context() as patcher: - patcher.setattr(SETTINGS, "exclusion_words_set", excluded_words) + patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words) fake_engine: SpellCheckService = SpellCheckService() prepared = fake_engine.prepare( models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False), diff --git a/whole_app/settings.py b/whole_app/settings.py index a1258e0..d682e50 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -6,6 +6,7 @@ import structlog import toml import typing_extensions +from pydantic import computed_field from pydantic_settings import BaseSettings @@ -199,23 +200,28 @@ class SettingsOfMicroservice(BaseSettings): exclusion_words_str: typing.Annotated[ str, pydantic.Field( - description="list of words which will ignored by default(string separated by comma). " "Example: 'foo, bar'" + description="String with list of words which will be ignored in /api/check endpoint each request. " + "Example: `'foo, bar'`" ), ] = "" - exclusion_words_set: typing.Annotated[ + _exclusion_words_set: typing.Annotated[ set[str], pydantic.Field( description="""set of words which will ignored by default(filled from exclusion_words_str). - Example: '["foo", "bar"]' """, + Example: `'["foo", "bar"]'` """, ), ] = set() + @computed_field # type: ignore[misc] + @property + def exclusion_words_set(self) -> set[str]: + return self._exclusion_words_set + @pydantic.model_validator(mode="after") - def assemble_exclusion_words_set(self) -> "typing_extensions.Self": - if not self.exclusion_words_set: - self.exclusion_words_set = { - one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word - } + def _assemble_exclusion_words_set(self) -> "typing_extensions.Self": + self._exclusion_words_set = { + one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word + } return self class Config: