Dialectic Endpoint Improvements (#67)

* feat(dialectic) Allow for batch questions and load session history * feat(dialectic) parallelize facts and history queries * feat(agent) Addresses dev-258 allow specifying additional collections * feat(uv) switched from poetry to uv * feat(deriver) Turn off derivations by editing session medatadata with a deriver_disabled flag * fix(tests) Clean up test logic --------- Co-authored-by: Vineeth Voruganti <[email protected]>
plastic-labs · Sep 14, 2024 · 2522992 · 2522992
1 parent 5cac118
commit 2522992
Show file tree

Hide file tree

Showing 16 changed files with 2,115 additions and 2,613 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -11,4 +11,4 @@ docker-compose.yml.example
 .github/**
 .vscode/**
 data/**
-
+.venv
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ api/docker-compose.yml
 *.db
 data
 docker-compose.yml
+compose.yml
 
 
 # Byte-compiled / optimized / DLL files

diff --git a/Dockerfile b/Dockerfile
@@ -2,39 +2,41 @@
 # https://testdriven.io/blog/docker-best-practices/
 FROM python:3.11-slim-bullseye
 
-RUN apt-get update && apt-get install -y build-essential
+COPY --from=ghcr.io/astral-sh/uv:0.4.9 /uv /bin/uv
 
+# Set Working directory
 WORKDIR /app
 
-# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
-ENV PYTHONFAULTHANDLER=1 \
-  PYTHONUNBUFFERED=1 \
-  PYTHONHASHSEED=random \
-  PIP_NO_CACHE_DIR=off \
-  PIP_DISABLE_PIP_VERSION_CHECK=on \
-  PIP_DEFAULT_TIMEOUT=100 \
-  POETRY_VERSION=1.8.3
+RUN addgroup --system app && adduser --system --group app
+RUN chown -R app:app /app
+USER app
 
-RUN pip install "poetry==$POETRY_VERSION"
+# Enable bytecode compilation
+ENV UV_COMPILE_BYTECODE=1
 
-# Copy only requirements to cache them in docker layer
-WORKDIR /app
-COPY poetry.lock pyproject.toml /app/
+# Copy from the cache instead of linking since it's a mounted volume
+ENV UV_LINK_MODE=copy
 
-# Project initialization:
-RUN poetry config virtualenvs.create false \
-  && poetry install --no-root --no-interaction --no-ansi
+# Install the project's dependencies using the lockfile and settings
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-install-project --no-dev
 
-WORKDIR /app
+# Copy only requirements to cache them in docker layer
+COPY uv.lock pyproject.toml /app/
 
-RUN addgroup --system app && adduser --system --group app
-RUN chown -R app:app /app
-USER app
+# Sync the project
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+
+# Place executables in the environment at the front of the path
+ENV PATH="/app/.venv/bin:$PATH"
 
 COPY --chown=app:app src/ /app/src/
 
 EXPOSE 8000
 
 # https://stackoverflow.com/questions/29663459/python-app-does-not-print-anything-when-running-detached-in-docker
-CMD ["fastapi", "run", "src/main.py"]
+CMD ["fastapi", "dev", "--host", "0.0.0.0", "src/main.py"]
 
diff --git a/docker-compose.yml.example b/docker-compose.yml.example
@@ -1,4 +1,3 @@
-version: "3.8"
 services:
   api:
     image: honcho:latest
@@ -18,7 +17,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-    entrypoint: ["python", "-m", "src.deriver"]
+    entrypoint: ["uv", "run", "python", "-m", "src.deriver"]
     depends_on:
       database:
         condition: service_healthy

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,37 +1,39 @@
-[tool.poetry]
+[project]
 name = "honcho"
-version = "0.0.11"
+version = "0.0.12"
 description = "Honcho Server"
-authors = ["Plastic Labs <[email protected]>"]
+authors = [
+  {name = "Plastic Labs", email = "[email protected]"},
+]
 readme = "README.md"
-package-mode = false
-
-[tool.poetry.dependencies]
-python = "^3.9"
-fastapi = "^0.111.0"
-python-dotenv = "^1.0.0"
-sqlalchemy = "^2.0.30"
-fastapi-pagination = "^0.12.24"
-pgvector = "^0.2.5"
-sentry-sdk = {extras = ["fastapi", "sqlalchemy"], version = "^2.3.1"}
-greenlet = "^3.0.3"
-psycopg = {extras= ["binary"], version="^3.1.19"}
-httpx = "^0.27.0"
-opentelemetry-instrumentation-fastapi = "^0.45b0"
-opentelemetry-sdk = "^1.24.0"
-opentelemetry-exporter-otlp = "^1.24.0"
-opentelemetry-instrumentation-sqlalchemy = "^0.45b0"
-opentelemetry-instrumentation-logging = "^0.45b0"
-rich = "^13.7.1"
-mirascope = "^0.18.0"
-openai = "^1.43.0"
-
-[tool.poetry.group.test.dependencies]
-pytest = "^8.2.2"
-sqlalchemy-utils = "^0.41.2"
-pytest-asyncio = "^0.23.7"
-coverage = "^7.6.0"
-interrogate = "^1.7.0"
+requires-python = ">=3.9"
+dependencies = [
+    "fastapi[standard]>=0.111.0",
+    "python-dotenv>=1.0.0",
+    "sqlalchemy>=2.0.30",
+    "fastapi-pagination>=0.12.24",
+    "pgvector>=0.2.5",
+    "sentry-sdk[fastapi,sqlalchemy]>=2.3.1",
+    "greenlet>=3.0.3",
+    "psycopg[binary]>=3.1.19",
+    "httpx>=0.27.0",
+    "opentelemetry-instrumentation-fastapi>=0.45b0",
+    "opentelemetry-sdk>=1.24.0",
+    "opentelemetry-exporter-otlp>=1.24.0",
+    "opentelemetry-instrumentation-sqlalchemy>=0.45b0",
+    "opentelemetry-instrumentation-logging>=0.45b0",
+    "rich>=13.7.1",
+    "mirascope>=0.18.0",
+    "openai>=1.43.0",
+]
+[tool.uv]
+dev-dependencies = [
+    "pytest>=8.2.2",
+    "sqlalchemy-utils>=0.41.2",
+    "pytest-asyncio>=0.23.7",
+    "coverage>=7.6.0",
+    "interrogate>=1.7.0",
+]
 
 [tool.ruff.lint]
 # from https://docs.astral.sh/ruff/linter/#rule-selection example
@@ -54,10 +56,5 @@ ignore = ["E501"]
 [tool.ruff.flake8-bugbear]
 extend-immutable-calls = ["fastapi.Depends"]
 
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
-
 [tool.lpytest.ini_options]
 asyncio_mode = "auto"
diff --git a/src/agent.py b/src/agent.py
@@ -1,27 +1,48 @@
+import asyncio
 import os
 import uuid
+from typing import Iterable, Set
 
 from dotenv import load_dotenv
 from mirascope.base import BaseConfig
 from mirascope.openai import OpenAICall, OpenAICallParams, azure_client_wrapper
-from sqlalchemy.ext.asyncio import AsyncSession
 
-from . import crud, schemas
+from src import crud, schemas
+from src.db import SessionLocal
 
 load_dotenv()
 
 
+class AsyncSet:
+    def __init__(self):
+        self._set: Set[str] = set()
+        self._lock = asyncio.Lock()
+
+    async def add(self, item: str):
+        async with self._lock:
+            self._set.add(item)
+
+    async def update(self, items: Iterable[str]):
+        async with self._lock:
+            self._set.update(items)
+
+    def get_set(self) -> Set[str]:
+        return self._set.copy()
+
+
 class Dialectic(OpenAICall):
     prompt_template = """
     You are tasked with responding to the query based on the context provided. 
     ---
     query: {agent_input}
     context: {retrieved_facts}
+    conversation_history: {chat_history}
     ---
     Provide a brief, matter-of-fact, and appropriate response to the query based on the context provided. If the context provided doesn't aid in addressing the query, return None. 
     """
     agent_input: str
     retrieved_facts: str
+    chat_history: list[str]
 
     configuration = BaseConfig(
         client_wrappers=[
@@ -35,61 +56,112 @@ class Dialectic(OpenAICall):
     call_params = OpenAICallParams(
         model=os.getenv("AZURE_OPENAI_DEPLOYMENT"), temperature=1.2, top_p=0.5
     )
-    # call_params = OpenAICallParams(model="gpt-4o-2024-05-13")
+
+
+async def chat_history(
+    app_id: uuid.UUID, user_id: uuid.UUID, session_id: uuid.UUID
+) -> list[str]:
+    async with SessionLocal() as db:
+        stmt = await crud.get_messages(db, app_id, user_id, session_id)
+        results = await db.execute(stmt)
+        messages = results.scalars()
+        history = []
+        for message in messages:
+            if message.is_user:
+                history.append(f"user:{message.content}")
+            else:
+                history.append(f"assistant:{message.content}")
+        return history
 
 
 async def prep_inference(
-    db: AsyncSession,
     app_id: uuid.UUID,
     user_id: uuid.UUID,
     query: str,
-):
-    collection = await crud.get_collection_by_name(db, app_id, user_id, "honcho")
-    retrieved_facts = None
-    if collection is None:
-        collection_create = schemas.CollectionCreate(name="honcho", metadata={})
-        collection = await crud.create_collection(
-            db,
-            collection=collection_create,
-            app_id=app_id,
-            user_id=user_id,
+    collection_name: str,
+) -> None | list[str]:
+    async with SessionLocal() as db:
+        collection = await crud.get_collection_by_name(
+            db, app_id, user_id, collection_name
         )
-    else:
-        retrieved_documents = await crud.query_documents(
-            db=db,
-            app_id=app_id,
-            user_id=user_id,
-            collection_id=collection.id,
-            query=query,
-            top_k=1,
-        )
-        if len(retrieved_documents) > 0:
-            retrieved_facts = retrieved_documents[0].content
+        retrieved_facts = None
+        if collection:
+            retrieved_documents = await crud.query_documents(
+                db=db,
+                app_id=app_id,
+                user_id=user_id,
+                collection_id=collection.id,
+                query=query,
+                top_k=3,
+            )
+            if len(retrieved_documents) > 0:
+                retrieved_facts = [d.content for d in retrieved_documents]
 
-    chain = Dialectic(
-        agent_input=query,
-        retrieved_facts=retrieved_facts if retrieved_facts else "None",
-    )
-    return chain
+        return retrieved_facts
 
 
-async def chat(
+async def generate_facts(
     app_id: uuid.UUID,
     user_id: uuid.UUID,
-    query: str,
-    db: AsyncSession,
+    fact_set: AsyncSet,
+    collection_name: str,
+    questions: list[str],
 ):
-    chain = await prep_inference(db, app_id, user_id, query)
-    response = await chain.call_async()
+    async def fetch_facts(query):
+        retrieved_facts = await prep_inference(app_id, user_id, query, collection_name)
+        if retrieved_facts is not None:
+            await fact_set.update(retrieved_facts)
 
-    return schemas.AgentChat(content=response.content)
+    await asyncio.gather(*[fetch_facts(query) for query in questions])
 
 
-async def stream(
+async def fact_generator(
     app_id: uuid.UUID,
     user_id: uuid.UUID,
-    query: str,
-    db: AsyncSession,
+    collections: list[str],
+    questions: list[str],
 ):
-    chain = await prep_inference(db, app_id, user_id, query)
-    return chain.stream_async()
+    fact_set = AsyncSet()
+    fact_tasks = [
+        generate_facts(app_id, user_id, fact_set, col, questions) for col in collections
+    ]
+    await asyncio.gather(*fact_tasks)
+    fact_set_copy = fact_set.get_set()
+    facts = "None"
+    if fact_set_copy and len(fact_set_copy) > 0:
+        facts = "\n".join(fact_set_copy)
+    return facts
+
+
+async def chat(
+    app_id: uuid.UUID,
+    user_id: uuid.UUID,
+    session_id: uuid.UUID,
+    query: schemas.AgentQuery,
+    stream: bool = False,
+):
+    questions = [query.queries] if isinstance(query.queries, str) else query.queries
+
+    final_query = "\n".join(questions) if len(questions) > 1 else questions[0]
+
+    collections = (
+        [query.collections] if isinstance(query.collections, str) else query.collections
+    )
+
+    # Run fact generation and chat history retrieval concurrently
+    fact_task = fact_generator(app_id, user_id, collections, questions)
+    history_task = chat_history(app_id, user_id, session_id)
+
+    # Wait for both tasks to complete
+    facts, history = await asyncio.gather(fact_task, history_task)
+
+    chain = Dialectic(
+        agent_input=final_query,
+        retrieved_facts=facts,
+        chat_history=history,
+    )
+
+    if stream:
+        return chain.stream_async()
+    response = chain.call()
+    return schemas.AgentChat(content=response.content)
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,4 @@ docker-compose.yml.example @@
     .github/**
     .vscode/**
     data/**
+    .venv