Skip to content

Commit

Permalink
Dialectic Endpoint Improvements (#67)
Browse files Browse the repository at this point in the history
* feat(dialectic) Allow for batch questions and load session history

* feat(dialectic) parallelize facts and history queries

* feat(agent) Addresses dev-258 allow specifying additional collections

* feat(uv) switched from poetry to uv

* feat(deriver) Turn off derivations by editing session medatadata with a deriver_disabled flag

* fix(tests) Clean up test logic

---------

Co-authored-by: Vineeth Voruganti <[email protected]>
  • Loading branch information
VVoruganti and Vineeth Voruganti authored Sep 14, 2024
1 parent 5cac118 commit 2522992
Show file tree
Hide file tree
Showing 16 changed files with 2,115 additions and 2,613 deletions.
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ docker-compose.yml.example
.github/**
.vscode/**
data/**

.venv
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ api/docker-compose.yml
*.db
data
docker-compose.yml
compose.yml


# Byte-compiled / optimized / DLL files
Expand Down
44 changes: 23 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,41 @@
# https://testdriven.io/blog/docker-best-practices/
FROM python:3.11-slim-bullseye

RUN apt-get update && apt-get install -y build-essential
COPY --from=ghcr.io/astral-sh/uv:0.4.9 /uv /bin/uv

# Set Working directory
WORKDIR /app

# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
POETRY_VERSION=1.8.3
RUN addgroup --system app && adduser --system --group app
RUN chown -R app:app /app
USER app

RUN pip install "poetry==$POETRY_VERSION"
# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1

# Copy only requirements to cache them in docker layer
WORKDIR /app
COPY poetry.lock pyproject.toml /app/
# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy

# Project initialization:
RUN poetry config virtualenvs.create false \
&& poetry install --no-root --no-interaction --no-ansi
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev

WORKDIR /app
# Copy only requirements to cache them in docker layer
COPY uv.lock pyproject.toml /app/

RUN addgroup --system app && adduser --system --group app
RUN chown -R app:app /app
USER app
# Sync the project
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev

# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"

COPY --chown=app:app src/ /app/src/

EXPOSE 8000

# https://stackoverflow.com/questions/29663459/python-app-does-not-print-anything-when-running-detached-in-docker
CMD ["fastapi", "run", "src/main.py"]
CMD ["fastapi", "dev", "--host", "0.0.0.0", "src/main.py"]

3 changes: 1 addition & 2 deletions docker-compose.yml.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: "3.8"
services:
api:
image: honcho:latest
Expand All @@ -18,7 +17,7 @@ services:
build:
context: .
dockerfile: Dockerfile
entrypoint: ["python", "-m", "src.deriver"]
entrypoint: ["uv", "run", "python", "-m", "src.deriver"]
depends_on:
database:
condition: service_healthy
Expand Down
2,475 changes: 0 additions & 2,475 deletions poetry.lock

This file was deleted.

69 changes: 33 additions & 36 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,37 +1,39 @@
[tool.poetry]
[project]
name = "honcho"
version = "0.0.11"
version = "0.0.12"
description = "Honcho Server"
authors = ["Plastic Labs <[email protected]>"]
authors = [
{name = "Plastic Labs", email = "[email protected]"},
]
readme = "README.md"
package-mode = false

[tool.poetry.dependencies]
python = "^3.9"
fastapi = "^0.111.0"
python-dotenv = "^1.0.0"
sqlalchemy = "^2.0.30"
fastapi-pagination = "^0.12.24"
pgvector = "^0.2.5"
sentry-sdk = {extras = ["fastapi", "sqlalchemy"], version = "^2.3.1"}
greenlet = "^3.0.3"
psycopg = {extras= ["binary"], version="^3.1.19"}
httpx = "^0.27.0"
opentelemetry-instrumentation-fastapi = "^0.45b0"
opentelemetry-sdk = "^1.24.0"
opentelemetry-exporter-otlp = "^1.24.0"
opentelemetry-instrumentation-sqlalchemy = "^0.45b0"
opentelemetry-instrumentation-logging = "^0.45b0"
rich = "^13.7.1"
mirascope = "^0.18.0"
openai = "^1.43.0"

[tool.poetry.group.test.dependencies]
pytest = "^8.2.2"
sqlalchemy-utils = "^0.41.2"
pytest-asyncio = "^0.23.7"
coverage = "^7.6.0"
interrogate = "^1.7.0"
requires-python = ">=3.9"
dependencies = [
"fastapi[standard]>=0.111.0",
"python-dotenv>=1.0.0",
"sqlalchemy>=2.0.30",
"fastapi-pagination>=0.12.24",
"pgvector>=0.2.5",
"sentry-sdk[fastapi,sqlalchemy]>=2.3.1",
"greenlet>=3.0.3",
"psycopg[binary]>=3.1.19",
"httpx>=0.27.0",
"opentelemetry-instrumentation-fastapi>=0.45b0",
"opentelemetry-sdk>=1.24.0",
"opentelemetry-exporter-otlp>=1.24.0",
"opentelemetry-instrumentation-sqlalchemy>=0.45b0",
"opentelemetry-instrumentation-logging>=0.45b0",
"rich>=13.7.1",
"mirascope>=0.18.0",
"openai>=1.43.0",
]
[tool.uv]
dev-dependencies = [
"pytest>=8.2.2",
"sqlalchemy-utils>=0.41.2",
"pytest-asyncio>=0.23.7",
"coverage>=7.6.0",
"interrogate>=1.7.0",
]

[tool.ruff.lint]
# from https://docs.astral.sh/ruff/linter/#rule-selection example
Expand All @@ -54,10 +56,5 @@ ignore = ["E501"]
[tool.ruff.flake8-bugbear]
extend-immutable-calls = ["fastapi.Depends"]


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.lpytest.ini_options]
asyncio_mode = "auto"
154 changes: 113 additions & 41 deletions src/agent.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
import asyncio
import os
import uuid
from typing import Iterable, Set

from dotenv import load_dotenv
from mirascope.base import BaseConfig
from mirascope.openai import OpenAICall, OpenAICallParams, azure_client_wrapper
from sqlalchemy.ext.asyncio import AsyncSession

from . import crud, schemas
from src import crud, schemas
from src.db import SessionLocal

load_dotenv()


class AsyncSet:
def __init__(self):
self._set: Set[str] = set()
self._lock = asyncio.Lock()

async def add(self, item: str):
async with self._lock:
self._set.add(item)

async def update(self, items: Iterable[str]):
async with self._lock:
self._set.update(items)

def get_set(self) -> Set[str]:
return self._set.copy()


class Dialectic(OpenAICall):
prompt_template = """
You are tasked with responding to the query based on the context provided.
---
query: {agent_input}
context: {retrieved_facts}
conversation_history: {chat_history}
---
Provide a brief, matter-of-fact, and appropriate response to the query based on the context provided. If the context provided doesn't aid in addressing the query, return None.
"""
agent_input: str
retrieved_facts: str
chat_history: list[str]

configuration = BaseConfig(
client_wrappers=[
Expand All @@ -35,61 +56,112 @@ class Dialectic(OpenAICall):
call_params = OpenAICallParams(
model=os.getenv("AZURE_OPENAI_DEPLOYMENT"), temperature=1.2, top_p=0.5
)
# call_params = OpenAICallParams(model="gpt-4o-2024-05-13")


async def chat_history(
app_id: uuid.UUID, user_id: uuid.UUID, session_id: uuid.UUID
) -> list[str]:
async with SessionLocal() as db:
stmt = await crud.get_messages(db, app_id, user_id, session_id)
results = await db.execute(stmt)
messages = results.scalars()
history = []
for message in messages:
if message.is_user:
history.append(f"user:{message.content}")
else:
history.append(f"assistant:{message.content}")
return history


async def prep_inference(
db: AsyncSession,
app_id: uuid.UUID,
user_id: uuid.UUID,
query: str,
):
collection = await crud.get_collection_by_name(db, app_id, user_id, "honcho")
retrieved_facts = None
if collection is None:
collection_create = schemas.CollectionCreate(name="honcho", metadata={})
collection = await crud.create_collection(
db,
collection=collection_create,
app_id=app_id,
user_id=user_id,
collection_name: str,
) -> None | list[str]:
async with SessionLocal() as db:
collection = await crud.get_collection_by_name(
db, app_id, user_id, collection_name
)
else:
retrieved_documents = await crud.query_documents(
db=db,
app_id=app_id,
user_id=user_id,
collection_id=collection.id,
query=query,
top_k=1,
)
if len(retrieved_documents) > 0:
retrieved_facts = retrieved_documents[0].content
retrieved_facts = None
if collection:
retrieved_documents = await crud.query_documents(
db=db,
app_id=app_id,
user_id=user_id,
collection_id=collection.id,
query=query,
top_k=3,
)
if len(retrieved_documents) > 0:
retrieved_facts = [d.content for d in retrieved_documents]

chain = Dialectic(
agent_input=query,
retrieved_facts=retrieved_facts if retrieved_facts else "None",
)
return chain
return retrieved_facts


async def chat(
async def generate_facts(
app_id: uuid.UUID,
user_id: uuid.UUID,
query: str,
db: AsyncSession,
fact_set: AsyncSet,
collection_name: str,
questions: list[str],
):
chain = await prep_inference(db, app_id, user_id, query)
response = await chain.call_async()
async def fetch_facts(query):
retrieved_facts = await prep_inference(app_id, user_id, query, collection_name)
if retrieved_facts is not None:
await fact_set.update(retrieved_facts)

return schemas.AgentChat(content=response.content)
await asyncio.gather(*[fetch_facts(query) for query in questions])


async def stream(
async def fact_generator(
app_id: uuid.UUID,
user_id: uuid.UUID,
query: str,
db: AsyncSession,
collections: list[str],
questions: list[str],
):
chain = await prep_inference(db, app_id, user_id, query)
return chain.stream_async()
fact_set = AsyncSet()
fact_tasks = [
generate_facts(app_id, user_id, fact_set, col, questions) for col in collections
]
await asyncio.gather(*fact_tasks)
fact_set_copy = fact_set.get_set()
facts = "None"
if fact_set_copy and len(fact_set_copy) > 0:
facts = "\n".join(fact_set_copy)
return facts


async def chat(
app_id: uuid.UUID,
user_id: uuid.UUID,
session_id: uuid.UUID,
query: schemas.AgentQuery,
stream: bool = False,
):
questions = [query.queries] if isinstance(query.queries, str) else query.queries

final_query = "\n".join(questions) if len(questions) > 1 else questions[0]

collections = (
[query.collections] if isinstance(query.collections, str) else query.collections
)

# Run fact generation and chat history retrieval concurrently
fact_task = fact_generator(app_id, user_id, collections, questions)
history_task = chat_history(app_id, user_id, session_id)

# Wait for both tasks to complete
facts, history = await asyncio.gather(fact_task, history_task)

chain = Dialectic(
agent_input=final_query,
retrieved_facts=facts,
chat_history=history,
)

if stream:
return chain.stream_async()
response = chain.call()
return schemas.AgentChat(content=response.content)
Loading

0 comments on commit 2522992

Please sign in to comment.