Skip to content

Commit

Permalink
Merge pull request #30 from NikitaKozlovtcev/feature/NV-8287-added-ex…
Browse files Browse the repository at this point in the history
…cluded-words-from-envs

[NV-8287] added excluded words from envs
  • Loading branch information
xfenix authored Jul 15, 2024
2 parents aaf0cc5 + 7a05f23 commit 1f5f151
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 15 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage
Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary.

## Quickstart
* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
* check http://localhost:10113/docs/ for full REST documentation
* main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization)

Expand All @@ -35,18 +35,19 @@ You can change config of the service by changing the environment variables. Here
* `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
* `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
* `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string.

### Deployment
Note: all docker & docker-compose variants use named volumes to store user dictionaries.
#### Plain docker
`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0`
`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0`
#### Docker-compose
* Save this example configuration as `docker-compose.yml`:
```yml
version: "3.9"
services:
spellcheck:
image: xfenix/spellcheck-microservice:4.0.0
image: xfenix/spellcheck-microservice:4.1.0
ports:
- "10113:10113"
volumes:
Expand Down
12 changes: 6 additions & 6 deletions tests/test_dict_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_add_to_dict(
json=models.UserDictionaryRequestWithWord(
user_name=fake_user_name,
exception_word=fake_exc_word,
).dict(),
).model_dump(),
)
assert server_response.status_code == 201
if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_remove_from_user_dict(
json=models.UserDictionaryRequestWithWord(
user_name=fake_user_name,
exception_word=fake_exc_word,
).dict(),
).model_dump(),
)
assert server_response.status_code == 200
if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
Expand All @@ -92,7 +92,7 @@ def test_dummy_provider_init(
json=models.UserDictionaryRequestWithWord(
user_name=faker_obj.user_name(),
exception_word=faker_obj.word(),
).dict(),
).model_dump(),
)
assert server_response.status_code == 201

Expand All @@ -111,7 +111,7 @@ def test_disabled_dictionary_views(
json=models.UserDictionaryRequestWithWord(
user_name="test",
exception_word="test",
).dict(),
).model_dump(),
)
assert server_response.status_code == 404
# restore back api state to ensure other tests wont break
Expand All @@ -124,7 +124,7 @@ def test_empty_auth_key(self: "TestVarious", api_key: str) -> None:
json=models.UserDictionaryRequestWithWord(
user_name="test",
exception_word="test",
).dict(),
).model_dump(),
headers={} if api_key is None else {SETTINGS.api_key_header_name: ""},
)
assert server_response.status_code == 403
Expand All @@ -135,7 +135,7 @@ def test_wrong_api_key(self: "TestVarious") -> None:
json=models.UserDictionaryRequestWithWord(
user_name="test",
exception_word="test",
).dict(),
).model_dump(),
headers={
SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000",
},
Expand Down
29 changes: 25 additions & 4 deletions tests/test_spell_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> No
"""Dead simple test."""
server_response: typing.Final = app_client.post(
f"{SETTINGS.api_prefix}/check/",
json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).dict(),
json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
)
assert server_response.status_code == 200

Expand All @@ -53,7 +53,7 @@ def test_with_corrections_simple(
text=wannabe_user_input,
language=RU_LANG,
user_name=faker_obj.user_name(),
).dict(),
).model_dump(),
)
assert server_response.status_code == 200

Expand Down Expand Up @@ -87,7 +87,7 @@ def run_request() -> typing.Any:
text=wannabe_user_input,
language=RU_LANG,
user_name=user_name,
).dict(),
).model_dump(),
)

def parse_words(server_response: RequestsResponse) -> typing.Any:
Expand All @@ -103,8 +103,29 @@ def parse_words(server_response: RequestsResponse) -> typing.Any:
json=models.UserDictionaryRequestWithWord(
user_name=user_name,
exception_word=tested_word,
).dict(),
).model_dump(),
)
# and than check that excepted word not in the check output
server_response = run_request()
assert tested_word not in parse_words(server_response)


@pytest.mark.parametrize(
("wannabe_user_input", "excluded_words"),
[("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")],
)
def test_default_excluded_words(
app_client: "TestClient",
wannabe_user_input: str,
excluded_words: str,
monkeypatch: typing.Any,
) -> None:
"""Dead simple test."""
with monkeypatch.context() as patcher:
patcher.setattr(SETTINGS, "exclusion_words", excluded_words)
server_response: typing.Final = app_client.post(
f"{SETTINGS.api_prefix}/check/",
json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
)
assert server_response.status_code == 200
assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}"
6 changes: 6 additions & 0 deletions whole_app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ class SettingsOfMicroservice(BaseSettings):
),
] = 60
username_regex: str = r"^[a-zA-Z0-9-_]*$"
exclusion_words: typing.Annotated[
str,
pydantic.Field(
description="list of words which will ignored by default(string separated by comma)",
),
] = ""

class Config:
env_prefix: str = "spellcheck_"
Expand Down
2 changes: 1 addition & 1 deletion whole_app/spell.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def prepare(
if request_payload.exclude_urls:
for one_url in self._url_extractor.find_urls(self._input_text):
self._exclusion_words.extend(
[word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)]
{word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
)

self._spellcheck_engine = SpellChecker(request_payload.language)
Expand Down
3 changes: 2 additions & 1 deletion whole_app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ async def spell_check_main_endpoint(
exclusion_words = await storage_engine.prepare(
request_payload.user_name,
).fetch_records()
exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")])
return models.SpellCheckResponse(
**request_payload.dict(),
**request_payload.model_dump(),
corrections=await to_thread.run_sync(
spell_service.prepare(request_payload, exclusion_words).run_check,
),
Expand Down

0 comments on commit 1f5f151

Please sign in to comment.