diff --git a/agents-api/agents_api/queries/docs/delete_doc.py b/agents-api/agents_api/queries/docs/delete_doc.py index d1e02faf1..9d2075600 100644 --- a/agents-api/agents_api/queries/docs/delete_doc.py +++ b/agents-api/agents_api/queries/docs/delete_doc.py @@ -1,6 +1,7 @@ """ Timescale-based deletion of a doc record. """ + from typing import Literal from uuid import UUID diff --git a/agents-api/agents_api/queries/docs/get_doc.py b/agents-api/agents_api/queries/docs/get_doc.py index a0345f5e3..35d692c84 100644 --- a/agents-api/agents_api/queries/docs/get_doc.py +++ b/agents-api/agents_api/queries/docs/get_doc.py @@ -1,6 +1,7 @@ """ Timescale-based retrieval of a single doc record. """ + from typing import Literal from uuid import UUID @@ -41,7 +42,7 @@ async def get_doc( developer_id: UUID, doc_id: UUID, owner_type: Literal["user", "agent", "org"] | None = None, - owner_id: UUID | None = None + owner_id: UUID | None = None, ) -> tuple[str, list]: """ Fetch a single doc, optionally constrained to a given owner. diff --git a/agents-api/agents_api/queries/docs/list_docs.py b/agents-api/agents_api/queries/docs/list_docs.py index b145a1cbc..678c1a5e6 100644 --- a/agents-api/agents_api/queries/docs/list_docs.py +++ b/agents-api/agents_api/queries/docs/list_docs.py @@ -1,6 +1,7 @@ """ Timescale-based listing of docs with optional owner filter and pagination. """ + from typing import Literal from uuid import UUID diff --git a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py index c62188b61..af89cc1b8 100644 --- a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py +++ b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py @@ -2,10 +2,10 @@ Timescale-based doc embedding search using the `embedding` column. """ -import asyncpg -from typing import Literal, List +from typing import List, Literal from uuid import UUID +import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one @@ -32,6 +32,7 @@ LIMIT $2; """).sql(pretty=True) + @wrap_in_class( Doc, one=False, diff --git a/agents-api/agents_api/queries/docs/search_docs_by_text.py b/agents-api/agents_api/queries/docs/search_docs_by_text.py index c9a5a93e2..eed74e54b 100644 --- a/agents-api/agents_api/queries/docs/search_docs_by_text.py +++ b/agents-api/agents_api/queries/docs/search_docs_by_text.py @@ -2,10 +2,10 @@ Timescale-based doc text search using the `search_tsv` column. """ -import asyncpg from typing import Literal from uuid import UUID +import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one diff --git a/agents-api/agents_api/queries/docs/search_docs_hybrid.py b/agents-api/agents_api/queries/docs/search_docs_hybrid.py index 9e8d84dc7..ae107419d 100644 --- a/agents-api/agents_api/queries/docs/search_docs_hybrid.py +++ b/agents-api/agents_api/queries/docs/search_docs_hybrid.py @@ -3,7 +3,7 @@ via a simple distribution-based score fusion or direct weighting in Python. """ -from typing import Literal, List +from typing import List, Literal from uuid import UUID from beartype import beartype @@ -11,8 +11,9 @@ from ...autogen.openapi_model import Doc from ..utils import run_concurrently -from .search_docs_by_text import search_docs_by_text from .search_docs_by_embedding import search_docs_by_embedding +from .search_docs_by_text import search_docs_by_text + def dbsf_normalize(scores: List[float]) -> List[float]: """ @@ -20,19 +21,23 @@ def dbsf_normalize(scores: List[float]) -> List[float]: from (mean - 3*stddev) to (mean + 3*stddev) and scale to 0..1 """ import statistics + if len(scores) < 2: return scores m = statistics.mean(scores) sd = statistics.pstdev(scores) # population std if sd == 0: return scores - upper = m + 3*sd - lower = m - 3*sd + upper = m + 3 * sd + lower = m - 3 * sd + def clamp_scale(v): c = min(upper, max(lower, v)) return (c - lower) / (upper - lower) + return [clamp_scale(s) for s in scores] + @beartype def fuse_results( text_docs: List[Doc], embedding_docs: List[Doc], alpha: float @@ -151,6 +156,7 @@ async def search_docs_hybrid( # text_results, embed_results = await run_concurrently([task1, task2]) # Otherwise just do them in parallel with e.g. asyncio.gather: from asyncio import gather + text_results, embed_results = await gather(*tasks) # fuse them