From 0114a57cc347fb81a9b1b9e9fc0a3e5ce6eafeda Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Mon, 8 Jul 2024 19:12:20 +0300 Subject: [PATCH 1/4] [NV-8287] added excluded words from envs --- README.md | 1 + tests/test_dict_views.py | 12 ++++++------ tests/test_spell_views.py | 29 +++++++++++++++++++++++++---- whole_app/settings.py | 1 + whole_app/views.py | 3 ++- 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 10794a6..6c73e0d 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. +* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default `'["Foo", "bar"]'` ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/tests/test_dict_views.py b/tests/test_dict_views.py index 8aeb01f..9e3b0c5 100644 --- a/tests/test_dict_views.py +++ b/tests/test_dict_views.py @@ -42,7 +42,7 @@ def test_add_to_dict( json=models.UserDictionaryRequestWithWord( user_name=fake_user_name, exception_word=fake_exc_word, - ).dict(), + ).model_dump(), ) assert server_response.status_code == 201 if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: @@ -70,7 +70,7 @@ def test_remove_from_user_dict( json=models.UserDictionaryRequestWithWord( user_name=fake_user_name, exception_word=fake_exc_word, - ).dict(), + ).model_dump(), ) assert server_response.status_code == 200 if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: @@ -92,7 +92,7 @@ def test_dummy_provider_init( json=models.UserDictionaryRequestWithWord( user_name=faker_obj.user_name(), exception_word=faker_obj.word(), - ).dict(), + ).model_dump(), ) assert server_response.status_code == 201 @@ -111,7 +111,7 @@ def test_disabled_dictionary_views( json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), ) assert server_response.status_code == 404 # restore back api state to ensure other tests wont break @@ -124,7 +124,7 @@ def test_empty_auth_key(self: "TestVarious", api_key: str) -> None: json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), headers={} if api_key is None else {SETTINGS.api_key_header_name: ""}, ) assert server_response.status_code == 403 @@ -135,7 +135,7 @@ def test_wrong_api_key(self: "TestVarious") -> None: json=models.UserDictionaryRequestWithWord( user_name="test", exception_word="test", - ).dict(), + ).model_dump(), headers={ SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000", }, diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py index 93f7eb2..e3269d3 100644 --- a/tests/test_spell_views.py +++ b/tests/test_spell_views.py @@ -27,7 +27,7 @@ def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> No """Dead simple test.""" server_response: typing.Final = app_client.post( f"{SETTINGS.api_prefix}/check/", - json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).dict(), + json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), ) assert server_response.status_code == 200 @@ -53,7 +53,7 @@ def test_with_corrections_simple( text=wannabe_user_input, language=RU_LANG, user_name=faker_obj.user_name(), - ).dict(), + ).model_dump(), ) assert server_response.status_code == 200 @@ -87,7 +87,7 @@ def run_request() -> typing.Any: text=wannabe_user_input, language=RU_LANG, user_name=user_name, - ).dict(), + ).model_dump(), ) def parse_words(server_response: RequestsResponse) -> typing.Any: @@ -103,8 +103,29 @@ def parse_words(server_response: RequestsResponse) -> typing.Any: json=models.UserDictionaryRequestWithWord( user_name=user_name, exception_word=tested_word, - ).dict(), + ).model_dump(), ) # and than check that excepted word not in the check output server_response = run_request() assert tested_word not in parse_words(server_response) + + +@pytest.mark.parametrize( + ("wannabe_user_input", "excluded_words"), + [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", ["шячло", "попячтса"])], +) +def test_default_excluded_words( + app_client: "TestClient", + wannabe_user_input: str, + excluded_words: list[str], + monkeypatch: typing.Any, +) -> None: + """Dead simple test.""" + with monkeypatch.context() as patcher: + patcher.setattr(SETTINGS, "exclusion_words", excluded_words) + server_response: typing.Final = app_client.post( + f"{SETTINGS.api_prefix}/check/", + json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), + ) + assert server_response.status_code == 200 + assert server_response.json()["corrections"] == [], f"{server_response.json()=} --- {excluded_words=}" diff --git a/whole_app/settings.py b/whole_app/settings.py index eceb423..83da6ca 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -195,6 +195,7 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" + exclusion_words: list[str] = [] class Config: env_prefix: str = "spellcheck_" diff --git a/whole_app/views.py b/whole_app/views.py index 0668bfd..f4496b8 100644 --- a/whole_app/views.py +++ b/whole_app/views.py @@ -63,8 +63,9 @@ async def spell_check_main_endpoint( exclusion_words = await storage_engine.prepare( request_payload.user_name, ).fetch_records() + exclusion_words.extend(SETTINGS.exclusion_words) return models.SpellCheckResponse( - **request_payload.dict(), + **request_payload.model_dump(), corrections=await to_thread.run_sync( spell_service.prepare(request_payload, exclusion_words).run_check, ), From 7f0495459c979b006c2daeedf221048f7f21c35f Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Mon, 8 Jul 2024 19:59:26 +0300 Subject: [PATCH 2/4] [NV-8287] CR fixes, switch list[str] to str --- README.md | 2 +- tests/test_spell_views.py | 4 ++-- whole_app/settings.py | 2 +- whole_app/views.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6c73e0d..30ed1e2 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default `'["Foo", "bar"]'` +* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default(string separated by comma) `"Foo, bar"` ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. diff --git a/tests/test_spell_views.py b/tests/test_spell_views.py index e3269d3..5a7e78d 100644 --- a/tests/test_spell_views.py +++ b/tests/test_spell_views.py @@ -112,12 +112,12 @@ def parse_words(server_response: RequestsResponse) -> typing.Any: @pytest.mark.parametrize( ("wannabe_user_input", "excluded_words"), - [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", ["шячло", "попячтса"])], + [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", "шЯчЛо, ПоПяЧтСа")], ) def test_default_excluded_words( app_client: "TestClient", wannabe_user_input: str, - excluded_words: list[str], + excluded_words: str, monkeypatch: typing.Any, ) -> None: """Dead simple test.""" diff --git a/whole_app/settings.py b/whole_app/settings.py index 83da6ca..adfa265 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -195,7 +195,7 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" - exclusion_words: list[str] = [] + exclusion_words: str = "" class Config: env_prefix: str = "spellcheck_" diff --git a/whole_app/views.py b/whole_app/views.py index f4496b8..53b2a8c 100644 --- a/whole_app/views.py +++ b/whole_app/views.py @@ -63,7 +63,7 @@ async def spell_check_main_endpoint( exclusion_words = await storage_engine.prepare( request_payload.user_name, ).fetch_records() - exclusion_words.extend(SETTINGS.exclusion_words) + exclusion_words.extend([one_word.strip().lower() for one_word in SETTINGS.exclusion_words.split(",")]) return models.SpellCheckResponse( **request_payload.model_dump(), corrections=await to_thread.run_sync( From cc9f5bb59c4fab39d3b40bb71621a01b23a68704 Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Wed, 10 Jul 2024 14:03:41 +0300 Subject: [PATCH 3/4] [NV-8287] CR fixes --- README.md | 8 ++++---- whole_app/settings.py | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 30ed1e2..042f8a8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary. ## Quickstart -* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0` +* `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0` * check http://localhost:10113/docs/ for full REST documentation * main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization) @@ -35,19 +35,19 @@ You can change config of the service by changing the environment variables. Here * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. -* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignore by default(string separated by comma) `"Foo, bar"` +* `SPELLCHECK_EXCLUSION_WORDS` list of words which will ignored by default(string separated by comma). Default value is empty string. ### Deployment Note: all docker & docker-compose variants use named volumes to store user dictionaries. #### Plain docker -`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.0.0` +`docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:4.1.0` #### Docker-compose * Save this example configuration as `docker-compose.yml`: ```yml version: "3.9" services: spellcheck: - image: xfenix/spellcheck-microservice:4.0.0 + image: xfenix/spellcheck-microservice:4.1.0 ports: - "10113:10113" volumes: diff --git a/whole_app/settings.py b/whole_app/settings.py index adfa265..e1e3033 100644 --- a/whole_app/settings.py +++ b/whole_app/settings.py @@ -195,7 +195,12 @@ class SettingsOfMicroservice(BaseSettings): ), ] = 60 username_regex: str = r"^[a-zA-Z0-9-_]*$" - exclusion_words: str = "" + exclusion_words: typing.Annotated[ + str, + pydantic.Field( + description="list of words which will ignored by default(string separated by comma)", + ), + ] = "" class Config: env_prefix: str = "spellcheck_" From 7a05f23f41f9102319ab04ebc7a3b86056b6890e Mon Sep 17 00:00:00 2001 From: Nikita Kozlovtsev Date: Fri, 12 Jul 2024 16:06:52 +0300 Subject: [PATCH 4/4] fix github --- whole_app/spell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whole_app/spell.py b/whole_app/spell.py index a11d534..d4b11bd 100644 --- a/whole_app/spell.py +++ b/whole_app/spell.py @@ -35,7 +35,7 @@ def prepare( if request_payload.exclude_urls: for one_url in self._url_extractor.find_urls(self._input_text): self._exclusion_words.extend( - [word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)] + {word.lower() for word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} ) self._spellcheck_engine = SpellChecker(request_payload.language)