vineeth/dev 357 (#66)

* fix(api) Remove slowapi dependency * Found bug in mirascope that requires an empty OPENAI_API_KEY
plastic-labs · Sep 2, 2024 · 3acef51 · 3acef51
1 parent 658c52d
commit 3acef51
Show file tree

Hide file tree

Showing 8 changed files with 668 additions and 611 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 # https://pythonspeed.com/articles/base-image-python-docker-images/
 # https://testdriven.io/blog/docker-best-practices/
-FROM python:3.10-slim-bullseye
+FROM python:3.11-slim-bullseye
 
 RUN apt-get update && apt-get install -y build-essential
 
@@ -13,7 +13,7 @@ ENV PYTHONFAULTHANDLER=1 \
   PIP_NO_CACHE_DIR=off \
   PIP_DISABLE_PIP_VERSION_CHECK=on \
   PIP_DEFAULT_TIMEOUT=100 \
-  POETRY_VERSION=1.4.1
+  POETRY_VERSION=1.8.3
 
 RUN pip install "poetry==$POETRY_VERSION"
 
@@ -23,7 +23,7 @@ COPY poetry.lock pyproject.toml /app/
 
 # Project initialization:
 RUN poetry config virtualenvs.create false \
-  && poetry install --no-root --no-interaction --no-ansi --without dev
+  && poetry install --no-root --no-interaction --no-ansi
 
 WORKDIR /app
 
@@ -36,5 +36,5 @@ COPY --chown=app:app src/ /app/src/
 EXPOSE 8000
 
 # https://stackoverflow.com/questions/29663459/python-app-does-not-print-anything-when-running-detached-in-docker
-CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["fastapi", "run", "src/main.py"]
 
diff --git a/fly.toml b/fly.toml
@@ -1,29 +1,30 @@
-# fly.toml app configuration file generated for honcho-restless-dew-484 on 2024-01-18T08:20:57-08:00
-#
 # See https://fly.io/docs/reference/configuration/ for information about how to use this file.
-#
 
-app = "honcho"
-kill_signal = "SIGINT"
-kill_timeout = "5s"
+app = 'honcho'
+primary_region = 'ewr'
+kill_signal = 'SIGINT'
+kill_timeout = '5s'
+
+[build]
 
 [processes]
-  api = "python -m uvicorn src.main:app --host 0.0.0.0 --port 8000"
-  deriver = "python -m src.deriver"
+  api = 'fastapi run src/main.py'
+  deriver = 'python -m src.deriver'
 
 [http_service]
   internal_port = 8000
-  auto_stop_machines = false
+  auto_stop_machines = 'off'
   auto_start_machines = true
   min_machines_running = 1
-  processes = ["api"]
+  processes = ['api']
+
   [http_service.concurrency]
-    type = "requests"
+    type = 'requests'
     hard_limit = 250
     soft_limit = 200
 
 [[vm]]
-  cpu_kind = "shared"
+  memory = '512mb'
+  cpu_kind = 'shared'
   cpus = 1
-  memory_mb = 512
-  processes = ["api", "deriver"]
+  processes = ['api', 'deriver']
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,27 +4,27 @@ version = "0.0.11"
 description = "Honcho Server"
 authors = ["Plastic Labs <[email protected]>"]
 readme = "README.md"
+package-mode = false
 
 [tool.poetry.dependencies]
 python = "^3.9"
 fastapi = "^0.111.0"
 python-dotenv = "^1.0.0"
 sqlalchemy = "^2.0.30"
-slowapi = "^0.1.9"
 fastapi-pagination = "^0.12.24"
 pgvector = "^0.2.5"
-openai = "^1.12.0"
 sentry-sdk = {extras = ["fastapi", "sqlalchemy"], version = "^2.3.1"}
 greenlet = "^3.0.3"
 psycopg = {extras= ["binary"], version="^3.1.19"}
 httpx = "^0.27.0"
-mirascope = "^0.18.2"
 opentelemetry-instrumentation-fastapi = "^0.45b0"
 opentelemetry-sdk = "^1.24.0"
 opentelemetry-exporter-otlp = "^1.24.0"
 opentelemetry-instrumentation-sqlalchemy = "^0.45b0"
 opentelemetry-instrumentation-logging = "^0.45b0"
 rich = "^13.7.1"
+mirascope = "^0.18.0"
+openai = "^1.43.0"
 
 [tool.poetry.group.test.dependencies]
 pytest = "^8.2.2"

diff --git a/src/deriver/consumer.py b/src/deriver/consumer.py
@@ -95,15 +95,15 @@ async def process_ai_message(
     result = await db.execute(messages_stmt)
     messages = result.scalars().all()[::-1]
 
-    chat_history_str = "\n".join(
-        [f"human: {m.content}" if m.is_user else f"ai: {m.content}" for m in messages]
-    )
+    chat_history_str = "\n".join([
+        f"human: {m.content}" if m.is_user else f"ai: {m.content}" for m in messages
+    ])
     # append current message to chat history
     chat_history_str = f"{chat_history_str}\nai: {content}"
 
     # user prediction thought
     user_prediction_thought = UserPredictionThought(chat_history=chat_history_str)
-    user_prediction_thought_response = await user_prediction_thought.call_async()
+    user_prediction_thought_response = user_prediction_thought.call()
 
     ## query the collection to build the context
     additional_data = re.findall(
@@ -132,9 +132,7 @@ async def process_ai_message(
         retrieved_context=context_str,
         chat_history=chat_history_str,
     )
-    user_prediction_thought_revision_response = (
-        await user_prediction_thought_revision.call_async()
-    )
+    user_prediction_thought_revision_response = user_prediction_thought_revision.call()
 
     if user_prediction_thought_revision_response.content == "None":
         rprint("[blue]Model predicted no changes to the user prediction thought")
@@ -245,7 +243,7 @@ async def process_user_message(
             voe_thought = VoeThought(
                 user_prediction_thought_revision=metamessage.content, actual=content
             )
-            voe_thought_response = await voe_thought.call_async()
+            voe_thought_response = voe_thought.call()
 
             # VoE derive facts
             voe_derive_facts = VoeDeriveFacts(
@@ -254,7 +252,7 @@ async def process_user_message(
                 actual=content,
                 voe_thought=voe_thought_response.content,
             )
-            voe_derive_facts_response = await voe_derive_facts.call_async()
+            voe_derive_facts_response = voe_derive_facts.call()
 
             # debugging
             rprint("[orange1]=================")
@@ -329,7 +327,7 @@ async def check_dups(
 
         check_duplication.existing_facts = existing_facts
         check_duplication.new_fact = fact
-        response = await check_duplication.call_async()
+        response = check_duplication.call()
         rprint("[light_steel_blue]==================")
         rprint(f"[light_steel_blue]Dedupe Responses: {response.content}")
         rprint("[light_steel_blue]==================")

diff --git a/src/deriver/queue.py b/src/deriver/queue.py
@@ -94,6 +94,7 @@ async def schedule_session(
     async with semaphore, SessionLocal() as db:
         try:
             available_slots = semaphore._value
+            print(available_slots)
             new_sessions = await get_available_sessions(db, available_slots)
 
             if new_sessions:
@@ -152,6 +153,6 @@ async def main():
                 AsyncioIntegration(),
             ],
         )
-    semaphore = asyncio.Semaphore(5)  # Limit to 5 concurrent dequeuing operations
+    semaphore = asyncio.Semaphore(6)  # Limit to 5 concurrent dequeuing operations
     queue_empty_flag = asyncio.Event()  # Event to signal when the queue is empty
     await polling_loop(semaphore, queue_empty_flag)
diff --git a/src/deriver/voe.py b/src/deriver/voe.py
@@ -3,9 +3,11 @@
 
 from mirascope.base import BaseConfig
 from mirascope.openai import OpenAICall, OpenAICallParams, azure_client_wrapper
+from pydantic import ConfigDict
 
 
 class HonchoCall(OpenAICall):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     configuration = BaseConfig(
         client_wrappers=[
             azure_client_wrapper(

diff --git a/src/main.py b/src/main.py
@@ -34,10 +34,6 @@
     ConsoleSpanExporter,
     SimpleSpanProcessor,
 )
-from slowapi import Limiter, _rate_limit_exceeded_handler
-from slowapi.errors import RateLimitExceeded
-from slowapi.middleware import SlowAPIMiddleware
-from slowapi.util import get_remote_address
 from starlette.exceptions import HTTPException as StarletteHTTPException
 
 from src.routers import (
@@ -230,32 +226,18 @@ async def lifespan(app: FastAPI):
 
 router = APIRouter(prefix="/apps/{app_id}/users/{user_id}")
 
-# Create a Limiter instance
-limiter = Limiter(key_func=get_remote_address, default_limits=["100/minute"])
-
-# Add SlowAPI middleware to the application
-app.state.limiter = limiter
-app.add_exception_handler(
-    exc_class_or_status_code=RateLimitExceeded,
-    handler=_rate_limit_exceeded_handler,  # type: ignore
-)
-app.add_middleware(SlowAPIMiddleware)
-
-
 add_pagination(app)
 
 
 @app.exception_handler(StarletteHTTPException)
 async def http_exception_handler(request, exc):
     current_span = trace.get_current_span()
     if (current_span is not None) and (current_span.is_recording()):
-        current_span.set_attributes(
-            {
-                "http.status_text": str(exc.detail),
-                "otel.status_description": f"{exc.status_code} / {str(exc.detail)}",
-                "otel.status_code": "ERROR",
-            }
-        )
+        current_span.set_attributes({
+            "http.status_text": str(exc.detail),
+            "otel.status_description": f"{exc.status_code} / {str(exc.detail)}",
+            "otel.status_code": "ERROR",
+        })
     return PlainTextResponse(
         json.dumps({"detail": str(exc.detail)}), status_code=exc.status_code
     )