From 8720d46f8f8bacc38d4f6029e9f2dfb4247eeb0e Mon Sep 17 00:00:00 2001 From: Diwank Singh Tomer Date: Fri, 18 Oct 2024 14:51:04 -0400 Subject: [PATCH] fix(agents-api): Fix search stuff (#695) Signed-off-by: Diwank Singh Tomer ---- > [!IMPORTANT] > Add `clean` option to `extract_keywords` and filter empty queries in `nlp.py`; update imports and defaults in `utils.py`. > > - **Behavior**: > - Add `clean` parameter to `extract_keywords()` in `nlp.py` to optionally strip non-alphanumeric characters. > - Filter out empty queries in `paragraph_to_custom_queries()` in `nlp.py`. > - **Imports**: > - Add `debug` to imports in `utils.py`. > - **Function Defaults**: > - Change default `only_on_error` to `True` in `cozo_query()` in `utils.py`. > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral) for ca38891eea7402f6b7f5fd3f57b90c5533b3ab3c. It will automatically update as commits are pushed. Signed-off-by: Diwank Singh Tomer --- agents-api/agents_api/common/nlp.py | 7 ++++++- agents-api/agents_api/models/utils.py | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/agents-api/agents_api/common/nlp.py b/agents-api/agents_api/common/nlp.py index 2fec52421..bc4d33383 100644 --- a/agents-api/agents_api/common/nlp.py +++ b/agents-api/agents_api/common/nlp.py @@ -8,13 +8,14 @@ nlp = spacy.load("en_core_web_sm") -def extract_keywords(text: str, top_n: int = 10) -> list[str]: +def extract_keywords(text: str, top_n: int = 10, clean: bool = True) -> list[str]: """ Extracts significant keywords and phrases from the text. Args: text (str): The input text to process. top_n (int): Number of top keywords to extract based on frequency. + clean (bool): Strip non-alphanumeric characters from keywords. Returns: List[str]: A list of extracted keywords/phrases. @@ -46,6 +47,9 @@ def extract_keywords(text: str, top_n: int = 10) -> list[str]: # Get top_n keywords keywords = [item for item, count in freq.most_common(top_n)] + if clean: + keywords = [re.sub(r"[^\w\s\-_]+", "", kw) for kw in keywords] + return keywords @@ -212,5 +216,6 @@ def paragraph_to_custom_queries(paragraph: str) -> list[str]: """ queries = [text_to_custom_query(sentence.text) for sentence in nlp(paragraph).sents] + queries = [q for q in queries if q] return queries diff --git a/agents-api/agents_api/models/utils.py b/agents-api/agents_api/models/utils.py index e182de077..0b0c41edd 100644 --- a/agents-api/agents_api/models/utils.py +++ b/agents-api/agents_api/models/utils.py @@ -8,7 +8,7 @@ from pydantic import BaseModel from ..common.utils.cozo import uuid_int_list_to_uuid4 -from ..env import do_verify_developer, do_verify_developer_owns_resource +from ..env import debug, do_verify_developer, do_verify_developer_owns_resource P = ParamSpec("P") T = TypeVar("T") @@ -185,8 +185,8 @@ def make_cozo_json_query(fields): def cozo_query( func: Callable[P, tuple[str | list[str | None], dict]] | None = None, - debug: bool | None = None, - only_on_error: bool = False, + debug: bool | None = debug, + only_on_error: bool = True, ): def cozo_query_dec(func: Callable[P, tuple[str | list[Any], dict]]): """