diff --git a/README.md b/README.md index 10794a6..042f8a8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary. ## Quickstart -* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0` +* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0` * check http://localhost:10113/docs/ for full REST documentation * main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization) @@ -35,18 +35,19 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. +* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. #### Plain docker -`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0` +`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0` #### Docker-compose * Save this example configuration as `docker-compose.yml`: ```yml version: "3.9" services: spellcheck: - image: xfenix/spellcheck-microservice:4.0.0 + image: xfenix/spellcheck-microservice:4.1.0 ports: - "10113:10113" volumes: diff --git a/tests/test_dict_views.py b/tests/test_dict_views.py index 8aeb01f..9e3b0c5 100644 --- a/tests/test_dict_views.py +++ b/tests/test_dict_views.py @@ -42,7 +42,7 @@ def test_add_to_dict( json=models.UserDictionaryRequestWithWord( user_name=fake_user_name, exception_word=fake_exc_word, - ).dict(), + ).model_dump(), ) assert server_response.status_code == 201 if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: @@ -70,7 +70,7 @@ def test_remove_from_user_dict( json=models.UserDictionaryRequestWithWord( user_name=fake_user_name, exception_word=fake_exc_word, - ).dict(), + ).model_dump(), ) assert server_response.status_code == 200 if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: @@ -92,7 +92,7 @@ def test_dummy_provider_init( json=models.UserDictionaryRequestWithWord( user_name=faker_obj.user_name(), exception_word=faker_obj.word(), - ).dict(), + ).model_dump(), ) assert server_response.status_code == 201 @@ -111,7 +111,7 @@ def test_disabled_dictionary_views( json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), ) assert server_response.status_code == 404 # restore back api state to ensure other tests wont break @@ -124,7 +124,7 @@ def test_empty_auth_key(self: "TestVarious", api_key: str) -> None: json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), headers={} if api_key is None else {SETTINGS.api_key_header_name: ""}, ) assert server_response.status_code == 403 @@ -135,7 +135,7 @@ def test_wrong_api_key(self: "TestVarious") -> None: json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), headers={ SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000", }, diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py index 93f7eb2..5a7e78d 100644 --- a/tests/test_spell_views.py +++ b/tests/test_spell_views.py @@ -27,7 +27,7 @@ def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> No """Dead simple test.""" server_response: typing.Final = app_client.post( f"{SETTINGS.api_prefix}/check/", - json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).dict(), + json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), ) assert server_response.status_code == 200 @@ -53,7 +53,7 @@ def test_with_corrections_simple( text=wannabe_user_input, language=RU_LANG, user_name=faker_obj.user_name(), - ).dict(), + ).model_dump(), ) assert server_response.status_code == 200 @@ -87,7 +87,7 @@ def run_request() -> typing.Any: text=wannabe_user_input, language=RU_LANG, user_name=user_name, - ).dict(), + ).model_dump(), ) def parse_words(server_response: RequestsResponse) -> typing.Any: @@ -103,8 +103,29 @@ def parse_words(server_response: RequestsResponse) -> typing.Any: json=models.UserDictionaryRequestWithWord( user_name=user_name, exception_word=tested_word, - ).dict(), + ).model_dump(), ) # and than check that excepted word not in the check output server_response = run_request() assert tested_word not in parse_words(server_response) + + +@pytest.mark.parametrize( + ("wannabe_user_input", "excluded_words"), + [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")], +) +def test_default_excluded_words( + app_client: "TestClient", + wannabe_user_input: str, + excluded_words: str, + monkeypatch: typing.Any, +) -> None: + """Dead simple test.""" + with monkeypatch.context() as patcher: + patcher.setattr(SETTINGS, "exclusion_words", excluded_words) + server_response: typing.Final = app_client.post( + f"{SETTINGS.api_prefix}/check/", + json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), + ) + assert server_response.status_code == 200 + assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}" diff --git a/whole_app/settings.py b/whole_app/settings.py index eceb423..e1e3033 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -195,6 +195,12 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" + exclusion_words: typing.Annotated[ + str, + pydantic.Field( + description="list of words which will ignored by default(string separated by comma)", + ), + ] = "" class Config: env_prefix: str = "spellcheck_" diff --git a/whole_app/spell.py b/whole_app/spell.py index a11d534..d4b11bd 100644 --- a/whole_app/spell.py +++ b/whole_app/spell.py @@ -35,7 +35,7 @@ def prepare( if request_payload.exclude_urls: for one_url in self._url_extractor.find_urls(self._input_text): self._exclusion_words.extend( - [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)] + {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} ) self._spellcheck_engine = SpellChecker(request_payload.language) diff --git a/whole_app/views.py b/whole_app/views.py index 0668bfd..53b2a8c 100644 --- a/whole_app/views.py +++ b/whole_app/views.py @@ -63,8 +63,9 @@ async def spell_check_main_endpoint( exclusion_words = await storage_engine.prepare( request_payload.user_name, ).fetch_records() + exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")]) return models.SpellCheckResponse( - **request_payload.dict(), + **request_payload.model_dump(), corrections=await to_thread.run_sync( spell_service.prepare(request_payload, exclusion_words).run_check, ),