diff --git a/agents-api/agents_api/autogen/Docs.py b/agents-api/agents_api/autogen/Docs.py index ffed27c1d..af5f60d6a 100644 --- a/agents-api/agents_api/autogen/Docs.py +++ b/agents-api/agents_api/autogen/Docs.py @@ -73,6 +73,30 @@ class Doc(BaseModel): """ Embeddings for the document """ + modality: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Modality of the document + """ + language: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Language of the document + """ + index: Annotated[int | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Index of the document + """ + embedding_model: Annotated[ + str | None, Field(json_schema_extra={"readOnly": True}) + ] = None + """ + Embedding model to use for the document + """ + embedding_dimensions: Annotated[ + int | None, Field(json_schema_extra={"readOnly": True}) + ] = None + """ + Dimensions of the embedding model + """ class DocOwner(BaseModel): diff --git a/agents-api/agents_api/queries/docs/__init__.py b/agents-api/agents_api/queries/docs/__init__.py index 0ba3db0d4..f7c207bf2 100644 --- a/agents-api/agents_api/queries/docs/__init__.py +++ b/agents-api/agents_api/queries/docs/__init__.py @@ -18,8 +18,15 @@ from .create_doc import create_doc from .delete_doc import delete_doc -from .embed_snippets import embed_snippets from .get_doc import get_doc from .list_docs import list_docs -from .search_docs_by_embedding import search_docs_by_embedding -from .search_docs_by_text import search_docs_by_text +# from .search_docs_by_embedding import search_docs_by_embedding +# from .search_docs_by_text import search_docs_by_text + +__all__ = [ + "create_doc", + "delete_doc", + "get_doc", + "list_docs", + # "search_docs_by_embct", +] diff --git a/agents-api/agents_api/queries/docs/create_doc.py b/agents-api/agents_api/queries/docs/create_doc.py index 57be43bdf..4528e9fc5 100644 --- a/agents-api/agents_api/queries/docs/create_doc.py +++ b/agents-api/agents_api/queries/docs/create_doc.py @@ -1,12 +1,4 @@ -""" -Timescale-based creation of docs. - -Mirrors the structure of create_file.py, but uses the docs/doc_owners tables. -""" - -import base64 -import hashlib -from typing import Any, Literal +from typing import Literal from uuid import UUID import asyncpg @@ -15,6 +7,9 @@ from sqlglot import parse_one from uuid_extensions import uuid7 +import ast + + from ...autogen.openapi_model import CreateDocRequest, Doc from ...metrics.counters import increase_counter from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class @@ -91,7 +86,7 @@ transform=lambda d: { **d, "id": d["doc_id"], - # You could optionally return a computed hash or partial content if desired + "content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]), }, ) @increase_counter("create_doc") @@ -102,26 +97,35 @@ async def create_doc( developer_id: UUID, doc_id: UUID | None = None, data: CreateDocRequest, - owner_type: Literal["user", "agent", "org"] | None = None, + owner_type: Literal["user", "agent"] | None = None, owner_id: UUID | None = None, -) -> list[tuple[str, list]]: + modality: Literal["text", "image", "mixed"] | None = "text", + embedding_model: str | None = "voyage-3", + embedding_dimensions: int | None = 1024, + language: str | None = "english", + index: int | None = 0, +) -> list[tuple[str, list] | tuple[str, list, str]]: """ Insert a new doc record into Timescale and optionally associate it with an owner. """ # Generate a UUID if not provided doc_id = doc_id or uuid7() + # check if content is a string + if isinstance(data.content, str): + data.content = [data.content] + # Create the doc record doc_params = [ developer_id, doc_id, data.title, - data.content, - data.index or 0, # fallback if no snippet index - data.modality or "text", - data.embedding_model or "none", - data.embedding_dimensions or 0, - data.language or "english", + str(data.content), + index, + modality, + embedding_model, + embedding_dimensions, + language, data.metadata or {}, ] diff --git a/agents-api/agents_api/queries/docs/delete_doc.py b/agents-api/agents_api/queries/docs/delete_doc.py index 9d2075600..adeb09bd8 100644 --- a/agents-api/agents_api/queries/docs/delete_doc.py +++ b/agents-api/agents_api/queries/docs/delete_doc.py @@ -1,7 +1,3 @@ -""" -Timescale-based deletion of a doc record. -""" - from typing import Literal from uuid import UUID @@ -65,7 +61,7 @@ async def delete_doc( *, developer_id: UUID, doc_id: UUID, - owner_type: Literal["user", "agent", "org"] | None = None, + owner_type: Literal["user", "agent"] | None = None, owner_id: UUID | None = None, ) -> tuple[str, list]: """ diff --git a/agents-api/agents_api/queries/docs/get_doc.py b/agents-api/agents_api/queries/docs/get_doc.py index 35d692c84..9155f500a 100644 --- a/agents-api/agents_api/queries/docs/get_doc.py +++ b/agents-api/agents_api/queries/docs/get_doc.py @@ -1,14 +1,9 @@ -""" -Timescale-based retrieval of a single doc record. -""" - from typing import Literal from uuid import UUID -import asyncpg from beartype import beartype -from fastapi import HTTPException from sqlglot import parse_one +import ast from ...autogen.openapi_model import Doc from ..utils import pg_query, wrap_in_class @@ -16,12 +11,12 @@ doc_query = parse_one(""" SELECT d.* FROM docs d -LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id +LEFT JOIN doc_owners doc_own ON d.developer_id = doc_own.developer_id AND d.doc_id = doc_own.doc_id WHERE d.developer_id = $1 AND d.doc_id = $2 AND ( ($3::text IS NULL AND $4::uuid IS NULL) - OR (do.owner_type = $3 AND do.owner_id = $4) + OR (doc_own.owner_type = $3 AND doc_own.owner_id = $4) ) LIMIT 1; """).sql(pretty=True) @@ -33,6 +28,8 @@ transform=lambda d: { **d, "id": d["doc_id"], + "content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]), + # "embeddings": d["embeddings"], }, ) @pg_query @@ -41,7 +38,7 @@ async def get_doc( *, developer_id: UUID, doc_id: UUID, - owner_type: Literal["user", "agent", "org"] | None = None, + owner_type: Literal["user", "agent"] | None = None, owner_id: UUID | None = None, ) -> tuple[str, list]: """ diff --git a/agents-api/agents_api/queries/docs/list_docs.py b/agents-api/agents_api/queries/docs/list_docs.py index 678c1a5e6..a4df08e73 100644 --- a/agents-api/agents_api/queries/docs/list_docs.py +++ b/agents-api/agents_api/queries/docs/list_docs.py @@ -1,52 +1,20 @@ -""" -Timescale-based listing of docs with optional owner filter and pagination. -""" - -from typing import Literal +from typing import Any, Literal from uuid import UUID -import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one +import ast from ...autogen.openapi_model import Doc from ..utils import pg_query, wrap_in_class -# Basic listing for all docs by developer -developer_docs_query = parse_one(""" +# Base query for listing docs +base_docs_query = parse_one(""" SELECT d.* FROM docs d -LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id +LEFT JOIN doc_owners doc_own ON d.developer_id = doc_own.developer_id AND d.doc_id = doc_own.doc_id WHERE d.developer_id = $1 -ORDER BY -CASE - WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at - WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at - WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at - WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at -END DESC NULLS LAST -LIMIT $2 -OFFSET $3; -""").sql(pretty=True) - -# Listing for docs associated with a specific owner -owner_docs_query = parse_one(""" -SELECT d.* -FROM docs d -JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id -WHERE do.developer_id = $1 - AND do.owner_id = $6 - AND do.owner_type = $7 -ORDER BY -CASE - WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at - WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at - WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at - WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at -END DESC NULLS LAST -LIMIT $2 -OFFSET $3; """).sql(pretty=True) @@ -56,6 +24,8 @@ transform=lambda d: { **d, "id": d["doc_id"], + "content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]), + # "embeddings": d["embeddings"], }, ) @pg_query @@ -64,11 +34,13 @@ async def list_docs( *, developer_id: UUID, owner_id: UUID | None = None, - owner_type: Literal["user", "agent", "org"] | None = None, + owner_type: Literal["user", "agent"] | None = None, limit: int = 100, offset: int = 0, sort_by: Literal["created_at", "updated_at"] = "created_at", direction: Literal["asc", "desc"] = "desc", + metadata_filter: dict[str, Any] = {}, + include_without_embeddings: bool = False, ) -> tuple[str, list]: """ Lists docs with optional owner filtering, pagination, and sorting. @@ -76,17 +48,36 @@ async def list_docs( if direction.lower() not in ["asc", "desc"]: raise HTTPException(status_code=400, detail="Invalid sort direction") + if sort_by not in ["created_at", "updated_at"]: + raise HTTPException(status_code=400, detail="Invalid sort field") + if limit > 100 or limit < 1: raise HTTPException(status_code=400, detail="Limit must be between 1 and 100") if offset < 0: raise HTTPException(status_code=400, detail="Offset must be >= 0") - params = [developer_id, limit, offset, sort_by, direction] - if owner_id and owner_type: - params.extend([owner_id, owner_type]) - query = owner_docs_query - else: - query = developer_docs_query + # Start with the base query + query = base_docs_query + params = [developer_id] + + # Add owner filtering + if owner_type and owner_id: + query += " AND doc_own.owner_type = $2 AND doc_own.owner_id = $3" + params.extend([owner_type, owner_id]) + + # Add metadata filtering + if metadata_filter: + for key, value in metadata_filter.items(): + query += f" AND d.metadata->>'{key}' = ${len(params) + 1}" + params.append(value) + + # Include or exclude documents without embeddings + # if not include_without_embeddings: + # query += " AND d.embeddings IS NOT NULL" + + # Add sorting and pagination + query += f" ORDER BY {sort_by} {direction} LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}" + params.extend([limit, offset]) - return (query, params) + return query, params diff --git a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py index af89cc1b8..e3120bd36 100644 --- a/agents-api/agents_api/queries/docs/search_docs_by_embedding.py +++ b/agents-api/agents_api/queries/docs/search_docs_by_embedding.py @@ -5,7 +5,6 @@ from typing import List, Literal from uuid import UUID -import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one diff --git a/agents-api/agents_api/queries/docs/search_docs_by_text.py b/agents-api/agents_api/queries/docs/search_docs_by_text.py index eed74e54b..9f434d438 100644 --- a/agents-api/agents_api/queries/docs/search_docs_by_text.py +++ b/agents-api/agents_api/queries/docs/search_docs_by_text.py @@ -5,7 +5,6 @@ from typing import Literal from uuid import UUID -import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one @@ -22,7 +21,7 @@ AND d.doc_id = do.doc_id WHERE d.developer_id = $1 AND ( - ($4::text IS NULL AND $5::uuid IS NULL) + ($4 IS NULL AND $5 IS NULL) OR (do.owner_type = $4 AND do.owner_id = $5) ) AND d.search_tsv @@ websearch_to_tsquery($3) diff --git a/agents-api/agents_api/queries/docs/search_docs_hybrid.py b/agents-api/agents_api/queries/docs/search_docs_hybrid.py index ae107419d..a879e3b6b 100644 --- a/agents-api/agents_api/queries/docs/search_docs_hybrid.py +++ b/agents-api/agents_api/queries/docs/search_docs_hybrid.py @@ -7,10 +7,8 @@ from uuid import UUID from beartype import beartype -from fastapi import HTTPException from ...autogen.openapi_model import Doc -from ..utils import run_concurrently from .search_docs_by_embedding import search_docs_by_embedding from .search_docs_by_text import search_docs_by_text diff --git a/agents-api/agents_api/queries/entries/get_history.py b/agents-api/agents_api/queries/entries/get_history.py index e6967a6cc..ffa0746c0 100644 --- a/agents-api/agents_api/queries/entries/get_history.py +++ b/agents-api/agents_api/queries/entries/get_history.py @@ -1,5 +1,4 @@ import json -from typing import Any, List, Tuple from uuid import UUID import asyncpg diff --git a/agents-api/agents_api/queries/files/get_file.py b/agents-api/agents_api/queries/files/get_file.py index 4d5dca4c0..5ccb08d86 100644 --- a/agents-api/agents_api/queries/files/get_file.py +++ b/agents-api/agents_api/queries/files/get_file.py @@ -6,13 +6,11 @@ from typing import Literal from uuid import UUID -import asyncpg from beartype import beartype -from fastapi import HTTPException from sqlglot import parse_one from ...autogen.openapi_model import File -from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class +from ..utils import pg_query, wrap_in_class # Define the raw SQL query file_query = parse_one(""" @@ -47,8 +45,8 @@ File, one=True, transform=lambda d: { - "id": d["file_id"], **d, + "id": d["file_id"], "hash": d["hash"].hex(), "content": "DUMMY: NEED TO FETCH CONTENT FROM BLOB STORAGE", }, diff --git a/agents-api/agents_api/queries/files/list_files.py b/agents-api/agents_api/queries/files/list_files.py index 2bc42f842..7c8b67887 100644 --- a/agents-api/agents_api/queries/files/list_files.py +++ b/agents-api/agents_api/queries/files/list_files.py @@ -3,51 +3,21 @@ It constructs and executes SQL queries to fetch a list of files based on developer ID with pagination. """ -from typing import Any, Literal +from typing import Literal from uuid import UUID -import asyncpg from beartype import beartype from fastapi import HTTPException from sqlglot import parse_one - from ...autogen.openapi_model import File -from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class +from ..utils import pg_query, wrap_in_class -# Query to list all files for a developer (uses developer_id index) -developer_files_query = parse_one(""" +# Base query for listing files +base_files_query = parse_one(""" SELECT f.* FROM files f LEFT JOIN file_owners fo ON f.developer_id = fo.developer_id AND f.file_id = fo.file_id WHERE f.developer_id = $1 -ORDER BY - CASE - WHEN $4 = 'created_at' AND $5 = 'asc' THEN created_at - WHEN $4 = 'created_at' AND $5 = 'desc' THEN created_at - WHEN $4 = 'updated_at' AND $5 = 'asc' THEN updated_at - WHEN $4 = 'updated_at' AND $5 = 'desc' THEN updated_at - END DESC NULLS LAST -LIMIT $2 -OFFSET $3; -""").sql(pretty=True) - -# Query to list files for a specific owner (uses composite indexes) -owner_files_query = parse_one(""" -SELECT f.* -FROM files f -JOIN file_owners fo ON f.developer_id = fo.developer_id AND f.file_id = fo.file_id -WHERE fo.developer_id = $1 -AND fo.owner_id = $6 -AND fo.owner_type = $7 -ORDER BY - CASE - WHEN $4 = 'created_at' AND $5 = 'asc' THEN created_at - WHEN $4 = 'created_at' AND $5 = 'desc' THEN created_at - WHEN $4 = 'updated_at' AND $5 = 'asc' THEN updated_at - WHEN $4 = 'updated_at' AND $5 = 'desc' THEN updated_at - END DESC NULLS LAST -LIMIT $2 -OFFSET $3; """).sql(pretty=True) @@ -74,49 +44,32 @@ async def list_files( direction: Literal["asc", "desc"] = "desc", ) -> tuple[str, list]: """ - Lists files with optimized queries for two cases: - 1. Owner specified: Returns files associated with that owner - 2. No owner: Returns all files for the developer - - Args: - developer_id: UUID of the developer - owner_id: Optional UUID of the owner (user or agent) - owner_type: Optional type of owner ("user" or "agent") - limit: Maximum number of records to return (1-100) - offset: Number of records to skip - sort_by: Field to sort by - direction: Sort direction ('asc' or 'desc') - - Returns: - Tuple of (query, params) - - Raises: - HTTPException: If parameters are invalid + Lists files with optional owner filtering, pagination, and sorting. """ # Validate parameters if direction.lower() not in ["asc", "desc"]: raise HTTPException(status_code=400, detail="Invalid sort direction") + if sort_by not in ["created_at", "updated_at"]: + raise HTTPException(status_code=400, detail="Invalid sort field") + if limit > 100 or limit < 1: raise HTTPException(status_code=400, detail="Limit must be between 1 and 100") if offset < 0: raise HTTPException(status_code=400, detail="Offset must be non-negative") - # Base parameters used in all queries - params = [ - developer_id, - limit, - offset, - sort_by, - direction, - ] + # Start with the base query + query = base_files_query + params = [developer_id] + + # Add owner filtering + if owner_type and owner_id: + query += " AND fo.owner_type = $2 AND fo.owner_id = $3" + params.extend([owner_type, owner_id]) - # Choose appropriate query based on owner details - if owner_id and owner_type: - params.extend([owner_id, owner_type]) # Add owner_id as $6 and owner_type as $7 - query = owner_files_query # Use single query with owner_type parameter - else: - query = developer_files_query + # Add sorting and pagination + query += f" ORDER BY {sort_by} {direction} LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}" + params.extend([limit, offset]) - return (query, params) + return query, params diff --git a/agents-api/agents_api/queries/sessions/create_session.py b/agents-api/agents_api/queries/sessions/create_session.py index 63fbdc940..058462cf8 100644 --- a/agents-api/agents_api/queries/sessions/create_session.py +++ b/agents-api/agents_api/queries/sessions/create_session.py @@ -8,10 +8,8 @@ from ...autogen.openapi_model import ( CreateSessionRequest, - ResourceCreatedResponse, Session, ) -from ...common.utils.datetime import utcnow from ...metrics.counters import increase_counter from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class diff --git a/agents-api/tests/fixtures.py b/agents-api/tests/fixtures.py index 286fd10fb..6689137d7 100644 --- a/agents-api/tests/fixtures.py +++ b/agents-api/tests/fixtures.py @@ -1,6 +1,5 @@ import random import string -import time from uuid import UUID from fastapi.testclient import TestClient @@ -12,6 +11,7 @@ CreateFileRequest, CreateSessionRequest, CreateUserRequest, + CreateDocRequest, ) from agents_api.clients.pg import create_db_pool from agents_api.env import api_key, api_key_header_name, multi_tenant_mode @@ -21,7 +21,8 @@ # from agents_api.queries.agents.delete_agent import delete_agent from agents_api.queries.developers.get_developer import get_developer -# from agents_api.queries.docs.create_doc import create_doc +from agents_api.queries.docs.create_doc import create_doc + # from agents_api.queries.docs.delete_doc import delete_doc # from agents_api.queries.execution.create_execution import create_execution # from agents_api.queries.execution.create_execution_transition import create_execution_transition @@ -149,6 +150,22 @@ async def test_file(dsn=pg_dsn, developer=test_developer, user=test_user): return file +@fixture(scope="test") +async def test_doc(dsn=pg_dsn, developer=test_developer): + pool = await create_db_pool(dsn=dsn) + doc = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="Hello", + content=["World"], + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + connection_pool=pool, + ) + return doc + + @fixture(scope="test") async def random_email(): return f"{"".join([random.choice(string.ascii_lowercase) for _ in range(10)])}@mail.com" diff --git a/agents-api/tests/test_docs_queries.py b/agents-api/tests/test_docs_queries.py index f2ff2c786..d6af42e57 100644 --- a/agents-api/tests/test_docs_queries.py +++ b/agents-api/tests/test_docs_queries.py @@ -1,163 +1,249 @@ -# # Tests for entry queries +from ward import test -# import asyncio +from agents_api.autogen.openapi_model import CreateDocRequest +from agents_api.clients.pg import create_db_pool +from agents_api.queries.docs.create_doc import create_doc +from agents_api.queries.docs.delete_doc import delete_doc +from agents_api.queries.docs.get_doc import get_doc +from agents_api.queries.docs.list_docs import list_docs -# from ward import test - -# from agents_api.autogen.openapi_model import CreateDocRequest -# from agents_api.queries.docs.create_doc import create_doc -# from agents_api.queries.docs.delete_doc import delete_doc -# from agents_api.queries.docs.embed_snippets import embed_snippets -# from agents_api.queries.docs.get_doc import get_doc -# from agents_api.queries.docs.list_docs import list_docs -# from agents_api.queries.docs.search_docs_by_embedding import search_docs_by_embedding +# If you wish to test text/embedding/hybrid search, import them: # from agents_api.queries.docs.search_docs_by_text import search_docs_by_text -# from tests.fixtures import ( -# EMBEDDING_SIZE, -# cozo_client, -# test_agent, -# test_developer_id, -# test_doc, -# test_user, -# ) - - -# @test("query: create docs") -# def _( -# client=cozo_client, developer_id=test_developer_id, agent=test_agent, user=test_user -# ): -# create_doc( -# developer_id=developer_id, -# owner_type="agent", -# owner_id=agent.id, -# data=CreateDocRequest(title="Hello", content=["World"]), -# client=client, -# ) - -# create_doc( -# developer_id=developer_id, -# owner_type="user", -# owner_id=user.id, -# data=CreateDocRequest(title="Hello", content=["World"]), -# client=client, -# ) - - -# @test("query: get docs") -# def _(client=cozo_client, doc=test_doc, developer_id=test_developer_id): -# get_doc( -# developer_id=developer_id, -# doc_id=doc.id, -# client=client, -# ) - - -# @test("query: delete doc") -# def _(client=cozo_client, developer_id=test_developer_id, agent=test_agent): -# doc = create_doc( -# developer_id=developer_id, -# owner_type="agent", -# owner_id=agent.id, -# data=CreateDocRequest(title="Hello", content=["World"]), -# client=client, -# ) - -# delete_doc( -# developer_id=developer_id, -# doc_id=doc.id, -# owner_type="agent", -# owner_id=agent.id, -# client=client, -# ) - - -# @test("query: list docs") -# def _( -# client=cozo_client, developer_id=test_developer_id, doc=test_doc, agent=test_agent -# ): -# result = list_docs( -# developer_id=developer_id, -# owner_type="agent", -# owner_id=agent.id, -# client=client, -# include_without_embeddings=True, -# ) - -# assert len(result) >= 1 - - -# @test("query: search docs by text") -# async def _(client=cozo_client, agent=test_agent, developer_id=test_developer_id): -# create_doc( -# developer_id=developer_id, -# owner_type="agent", -# owner_id=agent.id, -# data=CreateDocRequest( -# title="Hello", content=["The world is a funny little thing"] -# ), -# client=client, -# ) - -# await asyncio.sleep(1) - -# result = search_docs_by_text( -# developer_id=developer_id, -# owners=[("agent", agent.id)], -# query="funny", -# client=client, -# ) - -# assert len(result) >= 1 -# assert result[0].metadata is not None - - -# @test("query: search docs by embedding") -# async def _(client=cozo_client, agent=test_agent, developer_id=test_developer_id): -# doc = create_doc( -# developer_id=developer_id, -# owner_type="agent", -# owner_id=agent.id, -# data=CreateDocRequest(title="Hello", content=["World"]), -# client=client, -# ) - -# ### Add embedding to the snippet -# embed_snippets( -# developer_id=developer_id, -# doc_id=doc.id, -# snippet_indices=[0], -# embeddings=[[1.0] * EMBEDDING_SIZE], -# client=client, -# ) - -# await asyncio.sleep(1) - -# ### Search -# query_embedding = [0.99] * EMBEDDING_SIZE - -# result = search_docs_by_embedding( -# developer_id=developer_id, -# owners=[("agent", agent.id)], -# query_embedding=query_embedding, -# client=client, -# ) - -# assert len(result) >= 1 -# assert result[0].metadata is not None - - -# @test("query: embed snippets") -# def _(client=cozo_client, developer_id=test_developer_id, doc=test_doc): -# snippet_indices = [0] -# embeddings = [[1.0] * EMBEDDING_SIZE] - -# result = embed_snippets( -# developer_id=developer_id, -# doc_id=doc.id, -# snippet_indices=snippet_indices, -# embeddings=embeddings, -# client=client, -# ) - -# assert result is not None -# assert result.id == doc.id +# from agents_api.queries.docs.search_docs_by_embedding import search_docs_by_embedding +# from agents_api.queries.docs.search_docs_hybrid import search_docs_hybrid + +# You can rename or remove these imports to match your actual fixtures +from tests.fixtures import pg_dsn, test_agent, test_developer, test_user, test_doc + + +@test("query: create doc") +async def _(dsn=pg_dsn, developer=test_developer): + pool = await create_db_pool(dsn=dsn) + doc = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="Hello Doc", + content="This is sample doc content", + embed_instruction="Embed the document", + metadata={"test": "test"}, + ), + connection_pool=pool, + ) + + assert doc.title == "Hello Doc" + assert doc.content == "This is sample doc content" + assert doc.modality == "text" + assert doc.embedding_model == "voyage-3" + assert doc.embedding_dimensions == 1024 + assert doc.language == "english" + assert doc.index == 0 + +@test("query: create user doc") +async def _(dsn=pg_dsn, developer=test_developer, user=test_user): + pool = await create_db_pool(dsn=dsn) + doc = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="User Doc", + content="Docs for user testing", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + assert doc.title == "User Doc" + + # Verify doc appears in user's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + assert any(d.id == doc.id for d in docs_list) + +@test("query: create agent doc") +async def _(dsn=pg_dsn, developer=test_developer, agent=test_agent): + pool = await create_db_pool(dsn=dsn) + doc = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="Agent Doc", + content="Docs for agent testing", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + assert doc.title == "Agent Doc" + + # Verify doc appears in agent's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + assert any(d.id == doc.id for d in docs_list) + +@test("model: get doc") +async def _(dsn=pg_dsn, developer=test_developer, doc=test_doc): + pool = await create_db_pool(dsn=dsn) + doc_test = await get_doc( + developer_id=developer.id, + doc_id=doc.id, + connection_pool=pool, + ) + assert doc_test.id == doc.id + assert doc_test.title == doc.title + +@test("query: list docs") +async def _(dsn=pg_dsn, developer=test_developer, doc=test_doc): + pool = await create_db_pool(dsn=dsn) + docs_list = await list_docs( + developer_id=developer.id, + connection_pool=pool, + ) + assert len(docs_list) >= 1 + assert any(d.id == doc.id for d in docs_list) + +@test("query: list user docs") +async def _(dsn=pg_dsn, developer=test_developer, user=test_user): + pool = await create_db_pool(dsn=dsn) + + # Create a doc owned by the user + doc_user = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="User List Test", + content="Some user doc content", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + + # List user's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + assert len(docs_list) >= 1 + assert any(d.id == doc_user.id for d in docs_list) + +@test("query: list agent docs") +async def _(dsn=pg_dsn, developer=test_developer, agent=test_agent): + pool = await create_db_pool(dsn=dsn) + + # Create a doc owned by the agent + doc_agent = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="Agent List Test", + content="Some agent doc content", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + + # List agent's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + assert len(docs_list) >= 1 + assert any(d.id == doc_agent.id for d in docs_list) + +@test("query: delete user doc") +async def _(dsn=pg_dsn, developer=test_developer, user=test_user): + pool = await create_db_pool(dsn=dsn) + + # Create a doc owned by the user + doc_user = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="User Delete Test", + content="Doc for user deletion test", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + + # Delete the doc + await delete_doc( + developer_id=developer.id, + doc_id=doc_user.id, + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + + # Verify doc is no longer in user's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="user", + owner_id=user.id, + connection_pool=pool, + ) + assert not any(d.id == doc_user.id for d in docs_list) + +@test("query: delete agent doc") +async def _(dsn=pg_dsn, developer=test_developer, agent=test_agent): + pool = await create_db_pool(dsn=dsn) + + # Create a doc owned by the agent + doc_agent = await create_doc( + developer_id=developer.id, + data=CreateDocRequest( + title="Agent Delete Test", + content="Doc for agent deletion test", + metadata={"test": "test"}, + embed_instruction="Embed the document", + ), + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + + # Delete the doc + await delete_doc( + developer_id=developer.id, + doc_id=doc_agent.id, + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + + # Verify doc is no longer in agent's docs + docs_list = await list_docs( + developer_id=developer.id, + owner_type="agent", + owner_id=agent.id, + connection_pool=pool, + ) + assert not any(d.id == doc_agent.id for d in docs_list) + +@test("query: delete doc") +async def _(dsn=pg_dsn, developer=test_developer, doc=test_doc): + pool = await create_db_pool(dsn=dsn) + await delete_doc( + developer_id=developer.id, + doc_id=doc.id, + connection_pool=pool, + ) diff --git a/agents-api/tests/test_entry_queries.py b/agents-api/tests/test_entry_queries.py index 706185c7b..2a9746ef1 100644 --- a/agents-api/tests/test_entry_queries.py +++ b/agents-api/tests/test_entry_queries.py @@ -3,7 +3,6 @@ It verifies the functionality of adding, retrieving, and processing entries as defined in the schema. """ -from uuid import UUID from fastapi import HTTPException from uuid_extensions import uuid7 diff --git a/agents-api/tests/test_files_queries.py b/agents-api/tests/test_files_queries.py index 92b52d733..c83c7a6f6 100644 --- a/agents-api/tests/test_files_queries.py +++ b/agents-api/tests/test_files_queries.py @@ -1,9 +1,7 @@ # # Tests for entry queries -from fastapi import HTTPException -from uuid_extensions import uuid7 -from ward import raises, test +from ward import test from agents_api.autogen.openapi_model import CreateFileRequest from agents_api.clients.pg import create_db_pool diff --git a/agents-api/tests/test_session_queries.py b/agents-api/tests/test_session_queries.py index 171e56aa8..4673d6fc5 100644 --- a/agents-api/tests/test_session_queries.py +++ b/agents-api/tests/test_session_queries.py @@ -10,7 +10,6 @@ CreateOrUpdateSessionRequest, CreateSessionRequest, PatchSessionRequest, - ResourceCreatedResponse, ResourceDeletedResponse, ResourceUpdatedResponse, Session, diff --git a/integrations-service/integrations/autogen/Docs.py b/integrations-service/integrations/autogen/Docs.py index ffed27c1d..af5f60d6a 100644 --- a/integrations-service/integrations/autogen/Docs.py +++ b/integrations-service/integrations/autogen/Docs.py @@ -73,6 +73,30 @@ class Doc(BaseModel): """ Embeddings for the document """ + modality: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Modality of the document + """ + language: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Language of the document + """ + index: Annotated[int | None, Field(json_schema_extra={"readOnly": True})] = None + """ + Index of the document + """ + embedding_model: Annotated[ + str | None, Field(json_schema_extra={"readOnly": True}) + ] = None + """ + Embedding model to use for the document + """ + embedding_dimensions: Annotated[ + int | None, Field(json_schema_extra={"readOnly": True}) + ] = None + """ + Dimensions of the embedding model + """ class DocOwner(BaseModel): diff --git a/typespec/docs/models.tsp b/typespec/docs/models.tsp index 055fc2003..f4d16cbd5 100644 --- a/typespec/docs/models.tsp +++ b/typespec/docs/models.tsp @@ -27,6 +27,26 @@ model Doc { /** Embeddings for the document */ @visibility("read") embeddings?: float32[] | float32[][]; + + @visibility("read") + /** Modality of the document */ + modality?: string; + + @visibility("read") + /** Language of the document */ + language?: string; + + @visibility("read") + /** Index of the document */ + index?: uint16; + + @visibility("read") + /** Embedding model to use for the document */ + embedding_model?: string; + + @visibility("read") + /** Dimensions of the embedding model */ + embedding_dimensions?: uint16; } /** Payload for creating a doc */ diff --git a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml index d4835a695..c19bc4ed2 100644 --- a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml +++ b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml @@ -2876,6 +2876,28 @@ components: format: float description: Embeddings for the document readOnly: true + modality: + type: string + description: Modality of the document + readOnly: true + language: + type: string + description: Language of the document + readOnly: true + index: + type: integer + format: uint16 + description: Index of the document + readOnly: true + embedding_model: + type: string + description: Embedding model to use for the document + readOnly: true + embedding_dimensions: + type: integer + format: uint16 + description: Dimensions of the embedding model + readOnly: true Docs.DocOwner: type: object required: