From 116edf8d3c57558ea57409521996f018b163712a Mon Sep 17 00:00:00 2001 From: vedantsahai18 Date: Wed, 18 Dec 2024 23:43:07 -0500 Subject: [PATCH] wip(agents-api): Add file sql queries --- .../agents_api/queries/files/__init__.py | 21 +++ .../agents_api/queries/files/create_file.py | 150 ++++++++++++++++ .../agents_api/queries/files/delete_file.py | 118 +++++++++++++ .../agents_api/queries/files/get_file.py | 69 ++++++++ .../agents_api/queries/files/list_files.py | 161 ++++++++++++++++++ agents-api/tests/test_files_queries.py | 73 +++++--- 6 files changed, 567 insertions(+), 25 deletions(-) create mode 100644 agents-api/agents_api/queries/files/__init__.py create mode 100644 agents-api/agents_api/queries/files/create_file.py create mode 100644 agents-api/agents_api/queries/files/delete_file.py create mode 100644 agents-api/agents_api/queries/files/get_file.py create mode 100644 agents-api/agents_api/queries/files/list_files.py diff --git a/agents-api/agents_api/queries/files/__init__.py b/agents-api/agents_api/queries/files/__init__.py new file mode 100644 index 000000000..1da09114a --- /dev/null +++ b/agents-api/agents_api/queries/files/__init__.py @@ -0,0 +1,21 @@ +""" +The `files` module within the `queries` package provides SQL query functions for managing files +in the PostgreSQL database. This includes operations for: + +- Creating new files +- Retrieving file details +- Listing files with filtering and pagination +- Deleting files and their associations +""" + +from .create_file import create_file +from .delete_file import delete_file +from .get_file import get_file +from .list_files import list_files + +__all__ = [ + "create_file", + "delete_file", + "get_file", + "list_files" +] \ No newline at end of file diff --git a/agents-api/agents_api/queries/files/create_file.py b/agents-api/agents_api/queries/files/create_file.py new file mode 100644 index 000000000..77e065433 --- /dev/null +++ b/agents-api/agents_api/queries/files/create_file.py @@ -0,0 +1,150 @@ +""" +This module contains the functionality for creating files in the PostgreSQL database. +It includes functions to construct and execute SQL queries for inserting new file records. +""" + +from typing import Any, Literal +from uuid import UUID + +from beartype import beartype +from sqlglot import parse_one +from uuid_extensions import uuid7 +import asyncpg +from fastapi import HTTPException +import base64 +import hashlib + +from ...autogen.openapi_model import CreateFileRequest, File +from ...metrics.counters import increase_counter +from ..utils import pg_query, rewrap_exceptions, wrap_in_class, partialclass + +# Create file +file_query = parse_one(""" +INSERT INTO files ( + developer_id, + file_id, + name, + description, + mime_type, + size, + hash, +) +VALUES ( + $1, -- developer_id + $2, -- file_id + $3, -- name + $4, -- description + $5, -- mime_type + $6, -- size + $7, -- hash +) +RETURNING *; +""").sql(pretty=True) + +# Create user file association +user_file_query = parse_one(""" +INSERT INTO user_files ( + developer_id, + user_id, + file_id +) +VALUES ($1, $2, $3) +ON CONFLICT (developer_id, user_id, file_id) DO NOTHING; -- Uses primary key index +""").sql(pretty=True) + +# Create agent file association +agent_file_query = parse_one(""" +INSERT INTO agent_files ( + developer_id, + agent_id, + file_id +) +VALUES ($1, $2, $3) +ON CONFLICT (developer_id, agent_id, file_id) DO NOTHING; -- Uses primary key index +""").sql(pretty=True) + +# Add error handling decorator +# @rewrap_exceptions( +# { +# asyncpg.UniqueViolationError: partialclass( +# HTTPException, +# status_code=409, +# detail="A file with this name already exists for this developer", +# ), +# asyncpg.NoDataFoundError: partialclass( +# HTTPException, +# status_code=404, +# detail="The specified owner does not exist", +# ), +# asyncpg.ForeignKeyViolationError: partialclass( +# HTTPException, +# status_code=404, +# detail="The specified developer does not exist", +# ), +# } +# ) +@wrap_in_class( + File, + one=True, + transform=lambda d: { + **d, + "id": d["file_id"], + "content": "DUMMY: NEED TO FETCH CONTENT FROM BLOB STORAGE", + }, +) +@increase_counter("create_file") +@pg_query +@beartype +async def create_file( + *, + developer_id: UUID, + file_id: UUID | None = None, + data: CreateFileRequest, + owner_type: Literal["user", "agent"] | None = None, + owner_id: UUID | None = None, +) -> list[tuple[str, list] | tuple[str, list, str]]: + """ + Constructs and executes SQL queries to create a new file and optionally associate it with an owner. + + Parameters: + developer_id (UUID): The unique identifier for the developer. + file_id (UUID | None): Optional unique identifier for the file. + data (CreateFileRequest): The file data to insert. + owner_type (Literal["user", "agent"] | None): Optional type of owner + owner_id (UUID | None): Optional ID of the owner + + Returns: + list[tuple[str, list] | tuple[str, list, str]]: List of SQL queries, their parameters, and fetch type + """ + file_id = file_id or uuid7() + + # Calculate size and hash + content_bytes = base64.b64decode(data.content) + data.size = len(content_bytes) + data.hash = hashlib.sha256(content_bytes).digest() + + # Base file parameters + file_params = [ + developer_id, + file_id, + data.name, + data.description, + data.mime_type, + data.size, + data.hash, + ] + + queries = [] + + # Create the file + queries.append((file_query, file_params)) + + # Create the association only if both owner_type and owner_id are provided + if owner_type and owner_id: + assoc_params = [developer_id, owner_id, file_id] + if owner_type == "user": + queries.append((user_file_query, assoc_params)) + else: # agent + queries.append((agent_file_query, assoc_params)) + + return queries \ No newline at end of file diff --git a/agents-api/agents_api/queries/files/delete_file.py b/agents-api/agents_api/queries/files/delete_file.py new file mode 100644 index 000000000..d37e6f3e8 --- /dev/null +++ b/agents-api/agents_api/queries/files/delete_file.py @@ -0,0 +1,118 @@ +""" +This module contains the functionality for deleting files from the PostgreSQL database. +It constructs and executes SQL queries to remove file records and associated data. +""" + +from typing import Literal +from uuid import UUID + +from beartype import beartype +from sqlglot import parse_one +import asyncpg +from fastapi import HTTPException + +from ...autogen.openapi_model import ResourceDeletedResponse +from ...common.utils.datetime import utcnow +from ...metrics.counters import increase_counter +from ..utils import pg_query, rewrap_exceptions, wrap_in_class, partialclass + +# Simple query to delete file (when no associations exist) +delete_file_query = parse_one(""" +DELETE FROM files +WHERE developer_id = $1 +AND file_id = $2 +AND NOT EXISTS ( + SELECT 1 + FROM user_files uf + WHERE uf.file_id = $2 + LIMIT 1 +) +AND NOT EXISTS ( + SELECT 1 + FROM agent_files af + WHERE af.file_id = $2 + LIMIT 1 +) +RETURNING file_id; +""").sql(pretty=True) + +# Query to delete owner's association +delete_user_assoc_query = parse_one(""" +DELETE FROM user_files +WHERE developer_id = $1 +AND file_id = $2 +AND user_id = $3 +RETURNING file_id; +""").sql(pretty=True) + +delete_agent_assoc_query = parse_one(""" +DELETE FROM agent_files +WHERE developer_id = $1 +AND file_id = $2 +AND agent_id = $3 +RETURNING file_id; +""").sql(pretty=True) + + +# @rewrap_exceptions( +# { +# asyncpg.NoDataFoundError: partialclass( +# HTTPException, +# status_code=404, +# detail="File not found", +# ), +# } +# ) +@wrap_in_class( + ResourceDeletedResponse, + one=True, + transform=lambda d: { + "id": d["file_id"], + "deleted_at": utcnow(), + "jobs": [], + }, +) +@increase_counter("delete_file") +@pg_query +@beartype +async def delete_file( + *, + file_id: UUID, + developer_id: UUID, + owner_id: UUID | None = None, + owner_type: Literal["user", "agent"] | None = None, +) -> list[tuple[str, list] | tuple[str, list, str]]: + """ + Deletes a file and/or its association using simple, efficient queries. + + If owner details provided: + 1. Deletes the owner's association + 2. Checks for remaining associations + 3. Deletes file if no associations remain + If no owner details: + - Deletes file only if it has no associations + + Args: + file_id (UUID): The UUID of the file to be deleted. + developer_id (UUID): The UUID of the developer owning the file. + owner_id (UUID | None): Optional owner ID to verify ownership + owner_type (str | None): Optional owner type to verify ownership + + Returns: + list[tuple[str, list] | tuple[str, list, str]]: List of SQL queries, their parameters, and fetch type + """ + queries = [] + + if owner_id and owner_type: + # Delete specific association + assoc_params = [developer_id, file_id, owner_id] + assoc_query = delete_user_assoc_query if owner_type == "user" else delete_agent_assoc_query + queries.append((assoc_query, assoc_params)) + + # If no associations, delete file + queries.append((delete_file_query, [developer_id, file_id])) + else: + # Try to delete file if it has no associations + queries.append((delete_file_query, [developer_id, file_id])) + + return queries diff --git a/agents-api/agents_api/queries/files/get_file.py b/agents-api/agents_api/queries/files/get_file.py new file mode 100644 index 000000000..3143b8ff0 --- /dev/null +++ b/agents-api/agents_api/queries/files/get_file.py @@ -0,0 +1,69 @@ +""" +This module contains the functionality for retrieving a single file from the PostgreSQL database. +It constructs and executes SQL queries to fetch file details based on file ID and developer ID. +""" + +from uuid import UUID + +from beartype import beartype +from sqlglot import parse_one +from fastapi import HTTPException +import asyncpg + +from ...autogen.openapi_model import File +from ..utils import pg_query, rewrap_exceptions, wrap_in_class, partialclass + +# Define the raw SQL query +file_query = parse_one(""" +SELECT + file_id, -- Only select needed columns + developer_id, + name, + description, + mime_type, + size, + hash, + created_at, + updated_at +FROM files +WHERE developer_id = $1 -- Order matches composite index (developer_id, file_id) + AND file_id = $2 -- Using both parts of the index +LIMIT 1; -- Early termination once found +""").sql(pretty=True) + +@rewrap_exceptions( + { + asyncpg.NoDataFoundError: partialclass( + HTTPException, + status_code=404, + detail="File not found", + ), + asyncpg.ForeignKeyViolationError: partialclass( + HTTPException, + status_code=404, + detail="Developer not found", + ), + } +) +@wrap_in_class(File, one=True, transform=lambda d: {"id": d["file_id"], **d}) +@pg_query +@beartype +async def get_file(*, file_id: UUID, developer_id: UUID) -> tuple[str, list]: + """ + Constructs the SQL query to retrieve a file's details. + Uses composite index on (developer_id, file_id) for efficient lookup. + + Args: + file_id (UUID): The UUID of the file to retrieve. + developer_id (UUID): The UUID of the developer owning the file. + + Returns: + tuple[str, list]: A tuple containing the SQL query and its parameters. + + Raises: + HTTPException: If file or developer not found (404) + """ + return ( + file_query, + [developer_id, file_id], # Order matches index columns + ) \ No newline at end of file diff --git a/agents-api/agents_api/queries/files/list_files.py b/agents-api/agents_api/queries/files/list_files.py new file mode 100644 index 000000000..a01f74214 --- /dev/null +++ b/agents-api/agents_api/queries/files/list_files.py @@ -0,0 +1,161 @@ +""" +This module contains the functionality for listing files from the PostgreSQL database. +It constructs and executes SQL queries to fetch a list of files based on developer ID with pagination. +""" + +from typing import Any, Literal +from uuid import UUID +import asyncpg +from beartype import beartype +from fastapi import HTTPException +from sqlglot import parse_one + +from ...autogen.openapi_model import File +from ..utils import pg_query, rewrap_exceptions, wrap_in_class, partialclass + +# Query to list all files for a developer (uses developer_id index) +developer_files_query = parse_one(""" +SELECT + file_id, + developer_id, + name, + description, + mime_type, + size, + hash, + created_at, + updated_at +FROM files +WHERE developer_id = $1 +ORDER BY + CASE + WHEN $4 = 'created_at' AND $5 = 'asc' THEN created_at + WHEN $4 = 'created_at' AND $5 = 'desc' THEN created_at + WHEN $4 = 'updated_at' AND $5 = 'asc' THEN updated_at + WHEN $4 = 'updated_at' AND $5 = 'desc' THEN updated_at + END DESC NULLS LAST +LIMIT $2 +OFFSET $3; +""").sql(pretty=True) + +# Query to list files for a specific user (uses composite indexes) +user_files_query = parse_one(""" +SELECT + f.file_id, + f.developer_id, + f.name, + f.description, + f.mime_type, + f.size, + f.hash, + f.created_at, + f.updated_at +FROM user_files uf +JOIN files f USING (developer_id, file_id) +WHERE uf.developer_id = $1 +AND uf.user_id = $6 +ORDER BY + CASE + WHEN $4 = 'created_at' AND $5 = 'asc' THEN f.created_at + WHEN $4 = 'created_at' AND $5 = 'desc' THEN f.created_at + WHEN $4 = 'updated_at' AND $5 = 'asc' THEN f.updated_at + WHEN $4 = 'updated_at' AND $5 = 'desc' THEN f.updated_at + END DESC NULLS LAST +LIMIT $2 +OFFSET $3; +""").sql(pretty=True) + +# Query to list files for a specific agent (uses composite indexes) +agent_files_query = parse_one(""" +SELECT + f.file_id, + f.developer_id, + f.name, + f.description, + f.mime_type, + f.size, + f.hash, + f.created_at, + f.updated_at +FROM agent_files af +JOIN files f USING (developer_id, file_id) +WHERE af.developer_id = $1 +AND af.agent_id = $6 +ORDER BY + CASE + WHEN $4 = 'created_at' AND $5 = 'asc' THEN f.created_at + WHEN $4 = 'created_at' AND $5 = 'desc' THEN f.created_at + WHEN $4 = 'updated_at' AND $5 = 'asc' THEN f.updated_at + WHEN $4 = 'updated_at' AND $5 = 'desc' THEN f.updated_at + END DESC NULLS LAST +LIMIT $2 +OFFSET $3; +""").sql(pretty=True) + +@wrap_in_class( + File, + one=True, + transform=lambda d: { + **d, + "content": "DUMMY: NEED TO FETCH CONTENT FROM BLOB STORAGE", + }, +) +@pg_query +@beartype +async def list_files( + *, + developer_id: UUID, + owner_id: UUID | None = None, + owner_type: Literal["user", "agent"] | None = None, + limit: int = 100, + offset: int = 0, + sort_by: Literal["created_at", "updated_at"] = "created_at", + direction: Literal["asc", "desc"] = "desc", +) -> tuple[str, list]: + """ + Lists files with optimized queries for two cases: + 1. Owner specified: Returns files associated with that owner + 2. No owner: Returns all files for the developer + + Args: + developer_id: UUID of the developer + owner_id: Optional UUID of the owner (user or agent) + owner_type: Optional type of owner ("user" or "agent") + limit: Maximum number of records to return (1-100) + offset: Number of records to skip + sort_by: Field to sort by + direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (query, params) + + Raises: + HTTPException: If parameters are invalid + """ + # Validate parameters + if direction.lower() not in ["asc", "desc"]: + raise HTTPException(status_code=400, detail="Invalid sort direction") + + if limit > 100 or limit < 1: + raise HTTPException(status_code=400, detail="Limit must be between 1 and 100") + + if offset < 0: + raise HTTPException(status_code=400, detail="Offset must be non-negative") + + # Base parameters used in all queries + params = [ + developer_id, + limit, + offset, + sort_by, + direction, + ] + + # Choose appropriate query based on owner details + if owner_id and owner_type: + params.append(owner_id) # Add owner_id as $6 + query = user_files_query if owner_type == "user" else agent_files_query + else: + query = developer_files_query + + return (query, params) diff --git a/agents-api/tests/test_files_queries.py b/agents-api/tests/test_files_queries.py index 367fcccd4..5565d4059 100644 --- a/agents-api/tests/test_files_queries.py +++ b/agents-api/tests/test_files_queries.py @@ -1,22 +1,36 @@ # # Tests for entry queries -# from ward import test - -# from agents_api.autogen.openapi_model import CreateFileRequest -# from agents_api.queries.files.create_file import create_file -# from agents_api.queries.files.delete_file import delete_file -# from agents_api.queries.files.get_file import get_file -# from tests.fixtures import ( -# cozo_client, -# test_developer_id, -# test_file, -# ) - - -# @test("query: create file") -# def _(client=cozo_client, developer_id=test_developer_id): -# create_file( +from uuid_extensions import uuid7 +from ward import raises, test +from fastapi import HTTPException +from agents_api.autogen.openapi_model import CreateFileRequest +from agents_api.queries.files.create_file import create_file +from agents_api.queries.files.delete_file import delete_file +from agents_api.queries.files.get_file import get_file +from tests.fixtures import pg_dsn, test_agent, test_developer_id +from agents_api.clients.pg import create_db_pool + + +@test("query: create file") +async def _(dsn=pg_dsn, developer_id=test_developer_id): + pool = await create_db_pool(dsn=dsn) + await create_file( + developer_id=developer_id, + data=CreateFileRequest( + name="Hello", + description="World", + mime_type="text/plain", + content="eyJzYW1wbGUiOiAidGVzdCJ9", + ), + connection_pool=pool, + ) + + +# @test("query: get file") +# async def _(dsn=pg_dsn, developer_id=test_developer_id): +# pool = await create_db_pool(dsn=dsn) +# file = create_file( # developer_id=developer_id, # data=CreateFileRequest( # name="Hello", @@ -24,21 +38,20 @@ # mime_type="text/plain", # content="eyJzYW1wbGUiOiAidGVzdCJ9", # ), -# client=client, +# connection_pool=pool, # ) - -# @test("query: get file") -# def _(client=cozo_client, file=test_file, developer_id=test_developer_id): -# get_file( +# get_file_result = get_file( # developer_id=developer_id, # file_id=file.id, -# client=client, +# connection_pool=pool, # ) +# assert file == get_file_result # @test("query: delete file") -# def _(client=cozo_client, developer_id=test_developer_id): +# async def _(dsn=pg_dsn, developer_id=test_developer_id): +# pool = await create_db_pool(dsn=dsn) # file = create_file( # developer_id=developer_id, # data=CreateFileRequest( @@ -47,11 +60,21 @@ # mime_type="text/plain", # content="eyJzYW1wbGUiOiAidGVzdCJ9", # ), -# client=client, +# connection_pool=pool, # ) # delete_file( # developer_id=developer_id, # file_id=file.id, -# client=client, +# connection_pool=pool, # ) + +# with raises(HTTPException) as e: +# get_file( +# developer_id=developer_id, +# file_id=file.id, +# connection_pool=pool, +# ) + +# assert e.value.status_code == 404 +# assert e.value.detail == "The specified file does not exist" \ No newline at end of file