diff --git a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py index 61a0506f2..e9e3380e1 100644 --- a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py +++ b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py @@ -78,4 +78,3 @@ async def search_docs_by_embedding( metadata_filter, ], ) - diff --git a/agents-api/tests/test_docs_queries.py b/agents-api/tests/test_docs_queries.py index dfd48858b..03ef50aba 100644 --- a/agents-api/tests/test_docs_queries.py +++ b/agents-api/tests/test_docs_queries.py @@ -22,34 +22,36 @@ import math + def make_vector_with_similarity(n: int, d: float): """ Returns a list `v` of length `n` such that the cosine similarity between `v` and the all-ones vector of length `n` is approximately d. """ if not -1.0 <= d <= 1.0: - raise ValueError("d must lie in [-1, 1].") - + msg = "d must lie in [-1, 1]." + raise ValueError(msg) + # Handle special cases exactly: if abs(d - 1.0) < 1e-12: # d ~ +1 return [1.0] * n if abs(d + 1.0) < 1e-12: # d ~ -1 return [-1.0] * n - if abs(d) < 1e-12: # d ~ 0 - v = [0.0]*n + if abs(d) < 1e-12: # d ~ 0 + v = [0.0] * n if n >= 2: v[0] = 1.0 v[1] = -1.0 return v sign_d = 1.0 if d >= 0 else -1.0 - + # Base part: sign(d)*[1,1,...,1] - base = [sign_d]*n - + base = [sign_d] * n + # Orthogonal unit vector u with sum(u)=0; for simplicity: # u = [1/sqrt(2), -1/sqrt(2), 0, 0, ..., 0] - u = [0.0]*n + u = [0.0] * n if n >= 2: u[0] = 1.0 / math.sqrt(2) u[1] = -1.0 / math.sqrt(2) @@ -57,15 +59,16 @@ def make_vector_with_similarity(n: int, d: float): # Solve for alpha: # alpha^2 = n*(1 - d^2)/d^2 - alpha = math.sqrt(n*(1 - d*d)) / abs(d) + alpha = math.sqrt(n * (1 - d * d)) / abs(d) # Construct v - v = [0.0]*n + v = [0.0] * n for i in range(n): v[i] = base[i] + alpha * u[i] - + return v + @test("query: create user doc") async def _(dsn=pg_dsn, developer=test_developer, user=test_user): pool = await create_db_pool(dsn=dsn) @@ -329,7 +332,10 @@ async def _( @test("query: search docs by embedding with confidence") async def _( - dsn=pg_dsn, agent=test_agent, developer=test_developer, doc=test_doc_with_embedding, + dsn=pg_dsn, + agent=test_agent, + developer=test_developer, + doc=test_doc_with_embedding, ): pool = await create_db_pool(dsn=dsn) @@ -344,7 +350,7 @@ async def _( developer_id=developer.id, owners=[("agent", agent.id)], embedding=query_embedding, - confidence=confidence*0.9, + confidence=confidence * 0.9, k=3, # Add k parameter metadata_filter={"test": "test"}, # Add metadata filter connection_pool=pool, @@ -358,7 +364,7 @@ async def _( developer_id=developer.id, owners=[("agent", agent.id)], embedding=query_embedding, - confidence=confidence*1.1, + confidence=confidence * 1.1, k=3, # Add k parameter metadata_filter={"test": "test"}, # Add metadata filter connection_pool=pool,