Skip to content

Commit

Permalink
Merge pull request #31 from NikitaKozlovtcev/feature/NV-8287-global-i…
Browse files Browse the repository at this point in the history
…gnore

Feature/nv 8287 global ignore
  • Loading branch information
xfenix authored Jul 22, 2024
2 parents 1f5f151 + 53d3f4d commit 0e6e0e9
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ You can change config of the service by changing the environment variables. Here
* `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
* `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
* `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.
* `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string.

### Deployment
Note: all docker & docker-compose variants use named volumes to store user dictionaries.
Expand Down
24 changes: 24 additions & 0 deletions tests/test_spell.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import typing

import pytest

from tests._fixtures import COMMON_TEXT_MESSAGE
from tests.test_spell_views import RU_LANG
from whole_app import models
from whole_app.settings import SETTINGS
from whole_app.spell import SpellCheckService


Expand Down Expand Up @@ -44,3 +48,23 @@ def test_urls_ignored(
models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True),
).run_check()
assert not corrections


@pytest.mark.parametrize(
("wannabe_user_input", "excluded_words"),
[("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})],
)
def test_default_excluded_words(
wannabe_user_input: str,
excluded_words: str,
monkeypatch: typing.Any,
) -> None:
with monkeypatch.context() as patcher:
patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words)
fake_engine: SpellCheckService = SpellCheckService()
prepared = fake_engine.prepare(
models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False),
)

corrections = prepared.run_check()
assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}" # noqa: SLF001
21 changes: 0 additions & 21 deletions tests/test_spell_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,24 +108,3 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
# and than check that excepted word not in the check output
server_response = run_request()
assert tested_word not in parse_words(server_response)


@pytest.mark.parametrize(
("wannabe_user_input", "excluded_words"),
[("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
)
def test_default_excluded_words(
app_client: "TestClient",
wannabe_user_input: str,
excluded_words: str,
monkeypatch: typing.Any,
) -> None:
"""Dead simple test."""
with monkeypatch.context() as patcher:
patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
server_response: typing.Final = app_client.post(
f"{SETTINGS.api_prefix}/check/",
json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
)
assert server_response.status_code == 200
assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
26 changes: 24 additions & 2 deletions whole_app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pydantic
import structlog
import toml
import typing_extensions
from pydantic import computed_field
from pydantic_settings import BaseSettings


Expand Down Expand Up @@ -195,12 +197,32 @@ class SettingsOfMicroservice(BaseSettings):
),
] = 60
username_regex: str = r"^[a-zA-Z0-9-_]*$"
exclusion_words: typing.Annotated[
exclusion_words_str: typing.Annotated[
str,
pydantic.Field(
description="list of words which will ignored by default(string separated by comma)",
description="String with list of words which will be ignored in /api/check endpoint each request. "
"Example: `'foo, bar'`"
),
] = ""
_exclusion_words_set: typing.Annotated[
set[str],
pydantic.Field(
description="""set of words which will ignored by default(filled from exclusion_words_str).
Example: `'["foo", "bar"]'` """,
),
] = set()

@computed_field # type: ignore[misc]
@property
def exclusion_words_set(self) -> set[str]:
return self._exclusion_words_set

@pydantic.model_validator(mode="after")
def _assemble_exclusion_words_set(self) -> "typing_extensions.Self":
self._exclusion_words_set = {
one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
}
return self

class Config:
env_prefix: str = "spellcheck_"
Expand Down
4 changes: 2 additions & 2 deletions whole_app/spell.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def prepare(
"""Initialize machinery."""
self._input_text = request_payload.text
self._exclusion_words = exclusion_words if exclusion_words else []
self._exclusion_words.extend(SETTINGS.exclusion_words_set)

if request_payload.exclude_urls:
for one_url in self._url_extractor.find_urls(self._input_text):
self._exclusion_words.extend(
{word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
{one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
)

self._spellcheck_engine = SpellChecker(request_payload.language)
return self

Expand Down
1 change: 0 additions & 1 deletion whole_app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ async def spell_check_main_endpoint(
exclusion_words = await storage_engine.prepare(
request_payload.user_name,
).fetch_records()
exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
return models.SpellCheckResponse(
**request_payload.model_dump(),
corrections=await to_thread.run_sync(
Expand Down

0 comments on commit 0e6e0e9

Please sign in to comment.