Skip to content

Commit

Permalink
wip(agents-api): Doc queries
Browse files Browse the repository at this point in the history
  • Loading branch information
Vedantsahai18 committed Dec 20, 2024
1 parent 41739ee commit 6c77490
Show file tree
Hide file tree
Showing 10 changed files with 783 additions and 0 deletions.
25 changes: 25 additions & 0 deletions agents-api/agents_api/queries/docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Module: agents_api/models/docs
This module is responsible for managing document-related operations within the application, particularly for agents and possibly other entities. It serves as a core component of the document management system, enabling features such as document creation, listing, deletion, and embedding of snippets for enhanced search and retrieval capabilities.
Main functionalities include:
- Creating new documents and associating them with agents or users.
- Listing documents based on various criteria, including ownership and metadata filters.
- Deleting documents by their unique identifiers.
- Embedding document snippets for retrieval purposes.
The module interacts with other parts of the application, such as the agents and users modules, to provide a comprehensive document management system. Its role is crucial in enabling document search, retrieval, and management features within the context of agents and users.
This documentation aims to provide clear, concise, and sufficient context for new developers or contributors to understand the module's role without needing to dive deep into the code immediately.
"""

# ruff: noqa: F401, F403, F405

from .create_doc import create_doc
from .delete_doc import delete_doc
from .embed_snippets import embed_snippets
from .get_doc import get_doc
from .list_docs import list_docs
from .search_docs_by_embedding import search_docs_by_embedding
from .search_docs_by_text import search_docs_by_text
135 changes: 135 additions & 0 deletions agents-api/agents_api/queries/docs/create_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
Timescale-based creation of docs.
Mirrors the structure of create_file.py, but uses the docs/doc_owners tables.
"""

import base64
import hashlib
from typing import Any, Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one
from uuid_extensions import uuid7

from ...autogen.openapi_model import CreateDocRequest, Doc
from ...metrics.counters import increase_counter
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class

# Base INSERT for docs
doc_query = parse_one("""
INSERT INTO docs (
developer_id,
doc_id,
title,
content,
index,
modality,
embedding_model,
embedding_dimensions,
language,
metadata
)
VALUES (
$1, -- developer_id
$2, -- doc_id
$3, -- title
$4, -- content
$5, -- index
$6, -- modality
$7, -- embedding_model
$8, -- embedding_dimensions
$9, -- language
$10 -- metadata (JSONB)
)
RETURNING *;
""").sql(pretty=True)

# Owner association query for doc_owners
doc_owner_query = parse_one("""
WITH inserted_owner AS (
INSERT INTO doc_owners (
developer_id,
doc_id,
owner_type,
owner_id
)
VALUES ($1, $2, $3, $4)
RETURNING doc_id
)
SELECT d.*
FROM inserted_owner io
JOIN docs d ON d.doc_id = io.doc_id;
""").sql(pretty=True)


@rewrap_exceptions(
{
asyncpg.UniqueViolationError: partialclass(
HTTPException,
status_code=409,
detail="A document with this ID already exists for this developer",
),
asyncpg.NoDataFoundError: partialclass(
HTTPException,
status_code=404,
detail="The specified owner does not exist",
),
asyncpg.ForeignKeyViolationError: partialclass(
HTTPException,
status_code=404,
detail="Developer or doc owner not found",
),
}
)
@wrap_in_class(
Doc,
one=True,
transform=lambda d: {
**d,
"id": d["doc_id"],
# You could optionally return a computed hash or partial content if desired
},
)
@increase_counter("create_doc")
@pg_query
@beartype
async def create_doc(
*,
developer_id: UUID,
doc_id: UUID | None = None,
data: CreateDocRequest,
owner_type: Literal["user", "agent", "org"] | None = None,
owner_id: UUID | None = None,
) -> list[tuple[str, list]]:
"""
Insert a new doc record into Timescale and optionally associate it with an owner.
"""
# Generate a UUID if not provided
doc_id = doc_id or uuid7()

# Create the doc record
doc_params = [
developer_id,
doc_id,
data.title,
data.content,
data.index or 0, # fallback if no snippet index
data.modality or "text",
data.embedding_model or "none",
data.embedding_dimensions or 0,
data.language or "english",
data.metadata or {},
]

queries = [(doc_query, doc_params)]

# If an owner is specified, associate it:
if owner_type and owner_id:
owner_params = [developer_id, doc_id, owner_type, owner_id]
queries.append((doc_owner_query, owner_params))

return queries
77 changes: 77 additions & 0 deletions agents-api/agents_api/queries/docs/delete_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Timescale-based deletion of a doc record.
"""
from typing import Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one

from ...autogen.openapi_model import ResourceDeletedResponse
from ...common.utils.datetime import utcnow
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class

# Delete doc query + ownership check
delete_doc_query = parse_one("""
WITH deleted_owners AS (
DELETE FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND (
($3::text IS NULL AND $4::uuid IS NULL)
OR (owner_type = $3 AND owner_id = $4)
)
)
DELETE FROM docs
WHERE developer_id = $1
AND doc_id = $2
AND (
$3::text IS NULL OR EXISTS (
SELECT 1 FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND owner_type = $3
AND owner_id = $4
)
)
RETURNING doc_id;
""").sql(pretty=True)


@rewrap_exceptions(
{
asyncpg.NoDataFoundError: partialclass(
HTTPException,
status_code=404,
detail="Doc not found",
)
}
)
@wrap_in_class(
ResourceDeletedResponse,
one=True,
transform=lambda d: {
"id": d["doc_id"],
"deleted_at": utcnow(),
"jobs": [],
},
)
@pg_query
@beartype
async def delete_doc(
*,
developer_id: UUID,
doc_id: UUID,
owner_type: Literal["user", "agent", "org"] | None = None,
owner_id: UUID | None = None,
) -> tuple[str, list]:
"""
Deletes a doc (and associated doc_owners) for the given developer and doc_id.
If owner_type/owner_id is specified, only remove doc if that matches.
"""
return (
delete_doc_query,
[developer_id, doc_id, owner_type, owner_id],
)
Empty file.
52 changes: 52 additions & 0 deletions agents-api/agents_api/queries/docs/get_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Timescale-based retrieval of a single doc record.
"""
from typing import Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one

from ...autogen.openapi_model import Doc
from ..utils import pg_query, wrap_in_class

doc_query = parse_one("""
SELECT d.*
FROM docs d
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
WHERE d.developer_id = $1
AND d.doc_id = $2
AND (
($3::text IS NULL AND $4::uuid IS NULL)
OR (do.owner_type = $3 AND do.owner_id = $4)
)
LIMIT 1;
""").sql(pretty=True)


@wrap_in_class(
Doc,
one=True,
transform=lambda d: {
**d,
"id": d["doc_id"],
},
)
@pg_query
@beartype
async def get_doc(
*,
developer_id: UUID,
doc_id: UUID,
owner_type: Literal["user", "agent", "org"] | None = None,
owner_id: UUID | None = None
) -> tuple[str, list]:
"""
Fetch a single doc, optionally constrained to a given owner.
"""
return (
doc_query,
[developer_id, doc_id, owner_type, owner_id],
)
91 changes: 91 additions & 0 deletions agents-api/agents_api/queries/docs/list_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Timescale-based listing of docs with optional owner filter and pagination.
"""
from typing import Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one

from ...autogen.openapi_model import Doc
from ..utils import pg_query, wrap_in_class

# Basic listing for all docs by developer
developer_docs_query = parse_one("""
SELECT d.*
FROM docs d
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
WHERE d.developer_id = $1
ORDER BY
CASE
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at
END DESC NULLS LAST
LIMIT $2
OFFSET $3;
""").sql(pretty=True)

# Listing for docs associated with a specific owner
owner_docs_query = parse_one("""
SELECT d.*
FROM docs d
JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
WHERE do.developer_id = $1
AND do.owner_id = $6
AND do.owner_type = $7
ORDER BY
CASE
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at
END DESC NULLS LAST
LIMIT $2
OFFSET $3;
""").sql(pretty=True)


@wrap_in_class(
Doc,
one=False,
transform=lambda d: {
**d,
"id": d["doc_id"],
},
)
@pg_query
@beartype
async def list_docs(
*,
developer_id: UUID,
owner_id: UUID | None = None,
owner_type: Literal["user", "agent", "org"] | None = None,
limit: int = 100,
offset: int = 0,
sort_by: Literal["created_at", "updated_at"] = "created_at",
direction: Literal["asc", "desc"] = "desc",
) -> tuple[str, list]:
"""
Lists docs with optional owner filtering, pagination, and sorting.
"""
if direction.lower() not in ["asc", "desc"]:
raise HTTPException(status_code=400, detail="Invalid sort direction")

if limit > 100 or limit < 1:
raise HTTPException(status_code=400, detail="Limit must be between 1 and 100")

if offset < 0:
raise HTTPException(status_code=400, detail="Offset must be >= 0")

params = [developer_id, limit, offset, sort_by, direction]
if owner_id and owner_type:
params.extend([owner_id, owner_type])
query = owner_docs_query
else:
query = developer_docs_query

return (query, params)
Loading

0 comments on commit 6c77490

Please sign in to comment.