-
Notifications
You must be signed in to change notification settings - Fork 900
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
41739ee
commit 6c77490
Showing
10 changed files
with
783 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
Module: agents_api/models/docs | ||
This module is responsible for managing document-related operations within the application, particularly for agents and possibly other entities. It serves as a core component of the document management system, enabling features such as document creation, listing, deletion, and embedding of snippets for enhanced search and retrieval capabilities. | ||
Main functionalities include: | ||
- Creating new documents and associating them with agents or users. | ||
- Listing documents based on various criteria, including ownership and metadata filters. | ||
- Deleting documents by their unique identifiers. | ||
- Embedding document snippets for retrieval purposes. | ||
The module interacts with other parts of the application, such as the agents and users modules, to provide a comprehensive document management system. Its role is crucial in enabling document search, retrieval, and management features within the context of agents and users. | ||
This documentation aims to provide clear, concise, and sufficient context for new developers or contributors to understand the module's role without needing to dive deep into the code immediately. | ||
""" | ||
|
||
# ruff: noqa: F401, F403, F405 | ||
|
||
from .create_doc import create_doc | ||
from .delete_doc import delete_doc | ||
from .embed_snippets import embed_snippets | ||
from .get_doc import get_doc | ||
from .list_docs import list_docs | ||
from .search_docs_by_embedding import search_docs_by_embedding | ||
from .search_docs_by_text import search_docs_by_text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
""" | ||
Timescale-based creation of docs. | ||
Mirrors the structure of create_file.py, but uses the docs/doc_owners tables. | ||
""" | ||
|
||
import base64 | ||
import hashlib | ||
from typing import Any, Literal | ||
from uuid import UUID | ||
|
||
import asyncpg | ||
from beartype import beartype | ||
from fastapi import HTTPException | ||
from sqlglot import parse_one | ||
from uuid_extensions import uuid7 | ||
|
||
from ...autogen.openapi_model import CreateDocRequest, Doc | ||
from ...metrics.counters import increase_counter | ||
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class | ||
|
||
# Base INSERT for docs | ||
doc_query = parse_one(""" | ||
INSERT INTO docs ( | ||
developer_id, | ||
doc_id, | ||
title, | ||
content, | ||
index, | ||
modality, | ||
embedding_model, | ||
embedding_dimensions, | ||
language, | ||
metadata | ||
) | ||
VALUES ( | ||
$1, -- developer_id | ||
$2, -- doc_id | ||
$3, -- title | ||
$4, -- content | ||
$5, -- index | ||
$6, -- modality | ||
$7, -- embedding_model | ||
$8, -- embedding_dimensions | ||
$9, -- language | ||
$10 -- metadata (JSONB) | ||
) | ||
RETURNING *; | ||
""").sql(pretty=True) | ||
|
||
# Owner association query for doc_owners | ||
doc_owner_query = parse_one(""" | ||
WITH inserted_owner AS ( | ||
INSERT INTO doc_owners ( | ||
developer_id, | ||
doc_id, | ||
owner_type, | ||
owner_id | ||
) | ||
VALUES ($1, $2, $3, $4) | ||
RETURNING doc_id | ||
) | ||
SELECT d.* | ||
FROM inserted_owner io | ||
JOIN docs d ON d.doc_id = io.doc_id; | ||
""").sql(pretty=True) | ||
|
||
|
||
@rewrap_exceptions( | ||
{ | ||
asyncpg.UniqueViolationError: partialclass( | ||
HTTPException, | ||
status_code=409, | ||
detail="A document with this ID already exists for this developer", | ||
), | ||
asyncpg.NoDataFoundError: partialclass( | ||
HTTPException, | ||
status_code=404, | ||
detail="The specified owner does not exist", | ||
), | ||
asyncpg.ForeignKeyViolationError: partialclass( | ||
HTTPException, | ||
status_code=404, | ||
detail="Developer or doc owner not found", | ||
), | ||
} | ||
) | ||
@wrap_in_class( | ||
Doc, | ||
one=True, | ||
transform=lambda d: { | ||
**d, | ||
"id": d["doc_id"], | ||
# You could optionally return a computed hash or partial content if desired | ||
}, | ||
) | ||
@increase_counter("create_doc") | ||
@pg_query | ||
@beartype | ||
async def create_doc( | ||
*, | ||
developer_id: UUID, | ||
doc_id: UUID | None = None, | ||
data: CreateDocRequest, | ||
owner_type: Literal["user", "agent", "org"] | None = None, | ||
owner_id: UUID | None = None, | ||
) -> list[tuple[str, list]]: | ||
""" | ||
Insert a new doc record into Timescale and optionally associate it with an owner. | ||
""" | ||
# Generate a UUID if not provided | ||
doc_id = doc_id or uuid7() | ||
|
||
# Create the doc record | ||
doc_params = [ | ||
developer_id, | ||
doc_id, | ||
data.title, | ||
data.content, | ||
data.index or 0, # fallback if no snippet index | ||
data.modality or "text", | ||
data.embedding_model or "none", | ||
data.embedding_dimensions or 0, | ||
data.language or "english", | ||
data.metadata or {}, | ||
] | ||
|
||
queries = [(doc_query, doc_params)] | ||
|
||
# If an owner is specified, associate it: | ||
if owner_type and owner_id: | ||
owner_params = [developer_id, doc_id, owner_type, owner_id] | ||
queries.append((doc_owner_query, owner_params)) | ||
|
||
return queries |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
Timescale-based deletion of a doc record. | ||
""" | ||
from typing import Literal | ||
from uuid import UUID | ||
|
||
import asyncpg | ||
from beartype import beartype | ||
from fastapi import HTTPException | ||
from sqlglot import parse_one | ||
|
||
from ...autogen.openapi_model import ResourceDeletedResponse | ||
from ...common.utils.datetime import utcnow | ||
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class | ||
|
||
# Delete doc query + ownership check | ||
delete_doc_query = parse_one(""" | ||
WITH deleted_owners AS ( | ||
DELETE FROM doc_owners | ||
WHERE developer_id = $1 | ||
AND doc_id = $2 | ||
AND ( | ||
($3::text IS NULL AND $4::uuid IS NULL) | ||
OR (owner_type = $3 AND owner_id = $4) | ||
) | ||
) | ||
DELETE FROM docs | ||
WHERE developer_id = $1 | ||
AND doc_id = $2 | ||
AND ( | ||
$3::text IS NULL OR EXISTS ( | ||
SELECT 1 FROM doc_owners | ||
WHERE developer_id = $1 | ||
AND doc_id = $2 | ||
AND owner_type = $3 | ||
AND owner_id = $4 | ||
) | ||
) | ||
RETURNING doc_id; | ||
""").sql(pretty=True) | ||
|
||
|
||
@rewrap_exceptions( | ||
{ | ||
asyncpg.NoDataFoundError: partialclass( | ||
HTTPException, | ||
status_code=404, | ||
detail="Doc not found", | ||
) | ||
} | ||
) | ||
@wrap_in_class( | ||
ResourceDeletedResponse, | ||
one=True, | ||
transform=lambda d: { | ||
"id": d["doc_id"], | ||
"deleted_at": utcnow(), | ||
"jobs": [], | ||
}, | ||
) | ||
@pg_query | ||
@beartype | ||
async def delete_doc( | ||
*, | ||
developer_id: UUID, | ||
doc_id: UUID, | ||
owner_type: Literal["user", "agent", "org"] | None = None, | ||
owner_id: UUID | None = None, | ||
) -> tuple[str, list]: | ||
""" | ||
Deletes a doc (and associated doc_owners) for the given developer and doc_id. | ||
If owner_type/owner_id is specified, only remove doc if that matches. | ||
""" | ||
return ( | ||
delete_doc_query, | ||
[developer_id, doc_id, owner_type, owner_id], | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
""" | ||
Timescale-based retrieval of a single doc record. | ||
""" | ||
from typing import Literal | ||
from uuid import UUID | ||
|
||
import asyncpg | ||
from beartype import beartype | ||
from fastapi import HTTPException | ||
from sqlglot import parse_one | ||
|
||
from ...autogen.openapi_model import Doc | ||
from ..utils import pg_query, wrap_in_class | ||
|
||
doc_query = parse_one(""" | ||
SELECT d.* | ||
FROM docs d | ||
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id | ||
WHERE d.developer_id = $1 | ||
AND d.doc_id = $2 | ||
AND ( | ||
($3::text IS NULL AND $4::uuid IS NULL) | ||
OR (do.owner_type = $3 AND do.owner_id = $4) | ||
) | ||
LIMIT 1; | ||
""").sql(pretty=True) | ||
|
||
|
||
@wrap_in_class( | ||
Doc, | ||
one=True, | ||
transform=lambda d: { | ||
**d, | ||
"id": d["doc_id"], | ||
}, | ||
) | ||
@pg_query | ||
@beartype | ||
async def get_doc( | ||
*, | ||
developer_id: UUID, | ||
doc_id: UUID, | ||
owner_type: Literal["user", "agent", "org"] | None = None, | ||
owner_id: UUID | None = None | ||
) -> tuple[str, list]: | ||
""" | ||
Fetch a single doc, optionally constrained to a given owner. | ||
""" | ||
return ( | ||
doc_query, | ||
[developer_id, doc_id, owner_type, owner_id], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
""" | ||
Timescale-based listing of docs with optional owner filter and pagination. | ||
""" | ||
from typing import Literal | ||
from uuid import UUID | ||
|
||
import asyncpg | ||
from beartype import beartype | ||
from fastapi import HTTPException | ||
from sqlglot import parse_one | ||
|
||
from ...autogen.openapi_model import Doc | ||
from ..utils import pg_query, wrap_in_class | ||
|
||
# Basic listing for all docs by developer | ||
developer_docs_query = parse_one(""" | ||
SELECT d.* | ||
FROM docs d | ||
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id | ||
WHERE d.developer_id = $1 | ||
ORDER BY | ||
CASE | ||
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at | ||
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at | ||
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at | ||
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at | ||
END DESC NULLS LAST | ||
LIMIT $2 | ||
OFFSET $3; | ||
""").sql(pretty=True) | ||
|
||
# Listing for docs associated with a specific owner | ||
owner_docs_query = parse_one(""" | ||
SELECT d.* | ||
FROM docs d | ||
JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id | ||
WHERE do.developer_id = $1 | ||
AND do.owner_id = $6 | ||
AND do.owner_type = $7 | ||
ORDER BY | ||
CASE | ||
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at | ||
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at | ||
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at | ||
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at | ||
END DESC NULLS LAST | ||
LIMIT $2 | ||
OFFSET $3; | ||
""").sql(pretty=True) | ||
|
||
|
||
@wrap_in_class( | ||
Doc, | ||
one=False, | ||
transform=lambda d: { | ||
**d, | ||
"id": d["doc_id"], | ||
}, | ||
) | ||
@pg_query | ||
@beartype | ||
async def list_docs( | ||
*, | ||
developer_id: UUID, | ||
owner_id: UUID | None = None, | ||
owner_type: Literal["user", "agent", "org"] | None = None, | ||
limit: int = 100, | ||
offset: int = 0, | ||
sort_by: Literal["created_at", "updated_at"] = "created_at", | ||
direction: Literal["asc", "desc"] = "desc", | ||
) -> tuple[str, list]: | ||
""" | ||
Lists docs with optional owner filtering, pagination, and sorting. | ||
""" | ||
if direction.lower() not in ["asc", "desc"]: | ||
raise HTTPException(status_code=400, detail="Invalid sort direction") | ||
|
||
if limit > 100 or limit < 1: | ||
raise HTTPException(status_code=400, detail="Limit must be between 1 and 100") | ||
|
||
if offset < 0: | ||
raise HTTPException(status_code=400, detail="Offset must be >= 0") | ||
|
||
params = [developer_id, limit, offset, sort_by, direction] | ||
if owner_id and owner_type: | ||
params.extend([owner_id, owner_type]) | ||
query = owner_docs_query | ||
else: | ||
query = developer_docs_query | ||
|
||
return (query, params) |
Oops, something went wrong.