Skip to content

Commit

Permalink
vineeth/dev 357 (#66)
Browse files Browse the repository at this point in the history
* fix(api) Remove slowapi dependency

* Found bug in mirascope that requires an empty OPENAI_API_KEY
  • Loading branch information
VVoruganti authored Sep 2, 2024
1 parent 658c52d commit 3acef51
Show file tree
Hide file tree
Showing 8 changed files with 668 additions and 611 deletions.
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# https://pythonspeed.com/articles/base-image-python-docker-images/
# https://testdriven.io/blog/docker-best-practices/
FROM python:3.10-slim-bullseye
FROM python:3.11-slim-bullseye

RUN apt-get update && apt-get install -y build-essential

Expand All @@ -13,7 +13,7 @@ ENV PYTHONFAULTHANDLER=1 \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
POETRY_VERSION=1.4.1
POETRY_VERSION=1.8.3

RUN pip install "poetry==$POETRY_VERSION"

Expand All @@ -23,7 +23,7 @@ COPY poetry.lock pyproject.toml /app/

# Project initialization:
RUN poetry config virtualenvs.create false \
&& poetry install --no-root --no-interaction --no-ansi --without dev
&& poetry install --no-root --no-interaction --no-ansi

WORKDIR /app

Expand All @@ -36,5 +36,5 @@ COPY --chown=app:app src/ /app/src/
EXPOSE 8000

# https://stackoverflow.com/questions/29663459/python-app-does-not-print-anything-when-running-detached-in-docker
CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["fastapi", "run", "src/main.py"]

29 changes: 15 additions & 14 deletions fly.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
# fly.toml app configuration file generated for honcho-restless-dew-484 on 2024-01-18T08:20:57-08:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#

app = "honcho"
kill_signal = "SIGINT"
kill_timeout = "5s"
app = 'honcho'
primary_region = 'ewr'
kill_signal = 'SIGINT'
kill_timeout = '5s'

[build]

[processes]
api = "python -m uvicorn src.main:app --host 0.0.0.0 --port 8000"
deriver = "python -m src.deriver"
api = 'fastapi run src/main.py'
deriver = 'python -m src.deriver'

[http_service]
internal_port = 8000
auto_stop_machines = false
auto_stop_machines = 'off'
auto_start_machines = true
min_machines_running = 1
processes = ["api"]
processes = ['api']

[http_service.concurrency]
type = "requests"
type = 'requests'
hard_limit = 250
soft_limit = 200

[[vm]]
cpu_kind = "shared"
memory = '512mb'
cpu_kind = 'shared'
cpus = 1
memory_mb = 512
processes = ["api", "deriver"]
processes = ['api', 'deriver']
1,185 changes: 629 additions & 556 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,27 @@ version = "0.0.11"
description = "Honcho Server"
authors = ["Plastic Labs <[email protected]>"]
readme = "README.md"
package-mode = false

[tool.poetry.dependencies]
python = "^3.9"
fastapi = "^0.111.0"
python-dotenv = "^1.0.0"
sqlalchemy = "^2.0.30"
slowapi = "^0.1.9"
fastapi-pagination = "^0.12.24"
pgvector = "^0.2.5"
openai = "^1.12.0"
sentry-sdk = {extras = ["fastapi", "sqlalchemy"], version = "^2.3.1"}
greenlet = "^3.0.3"
psycopg = {extras= ["binary"], version="^3.1.19"}
httpx = "^0.27.0"
mirascope = "^0.18.2"
opentelemetry-instrumentation-fastapi = "^0.45b0"
opentelemetry-sdk = "^1.24.0"
opentelemetry-exporter-otlp = "^1.24.0"
opentelemetry-instrumentation-sqlalchemy = "^0.45b0"
opentelemetry-instrumentation-logging = "^0.45b0"
rich = "^13.7.1"
mirascope = "^0.18.0"
openai = "^1.43.0"

[tool.poetry.group.test.dependencies]
pytest = "^8.2.2"
Expand Down
18 changes: 8 additions & 10 deletions src/deriver/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,15 @@ async def process_ai_message(
result = await db.execute(messages_stmt)
messages = result.scalars().all()[::-1]

chat_history_str = "\n".join(
[f"human: {m.content}" if m.is_user else f"ai: {m.content}" for m in messages]
)
chat_history_str = "\n".join([
f"human: {m.content}" if m.is_user else f"ai: {m.content}" for m in messages
])
# append current message to chat history
chat_history_str = f"{chat_history_str}\nai: {content}"

# user prediction thought
user_prediction_thought = UserPredictionThought(chat_history=chat_history_str)
user_prediction_thought_response = await user_prediction_thought.call_async()
user_prediction_thought_response = user_prediction_thought.call()

## query the collection to build the context
additional_data = re.findall(
Expand Down Expand Up @@ -132,9 +132,7 @@ async def process_ai_message(
retrieved_context=context_str,
chat_history=chat_history_str,
)
user_prediction_thought_revision_response = (
await user_prediction_thought_revision.call_async()
)
user_prediction_thought_revision_response = user_prediction_thought_revision.call()

if user_prediction_thought_revision_response.content == "None":
rprint("[blue]Model predicted no changes to the user prediction thought")
Expand Down Expand Up @@ -245,7 +243,7 @@ async def process_user_message(
voe_thought = VoeThought(
user_prediction_thought_revision=metamessage.content, actual=content
)
voe_thought_response = await voe_thought.call_async()
voe_thought_response = voe_thought.call()

# VoE derive facts
voe_derive_facts = VoeDeriveFacts(
Expand All @@ -254,7 +252,7 @@ async def process_user_message(
actual=content,
voe_thought=voe_thought_response.content,
)
voe_derive_facts_response = await voe_derive_facts.call_async()
voe_derive_facts_response = voe_derive_facts.call()

# debugging
rprint("[orange1]=================")
Expand Down Expand Up @@ -329,7 +327,7 @@ async def check_dups(

check_duplication.existing_facts = existing_facts
check_duplication.new_fact = fact
response = await check_duplication.call_async()
response = check_duplication.call()
rprint("[light_steel_blue]==================")
rprint(f"[light_steel_blue]Dedupe Responses: {response.content}")
rprint("[light_steel_blue]==================")
Expand Down
3 changes: 2 additions & 1 deletion src/deriver/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ async def schedule_session(
async with semaphore, SessionLocal() as db:
try:
available_slots = semaphore._value
print(available_slots)
new_sessions = await get_available_sessions(db, available_slots)

if new_sessions:
Expand Down Expand Up @@ -152,6 +153,6 @@ async def main():
AsyncioIntegration(),
],
)
semaphore = asyncio.Semaphore(5) # Limit to 5 concurrent dequeuing operations
semaphore = asyncio.Semaphore(6) # Limit to 5 concurrent dequeuing operations
queue_empty_flag = asyncio.Event() # Event to signal when the queue is empty
await polling_loop(semaphore, queue_empty_flag)
2 changes: 2 additions & 0 deletions src/deriver/voe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@

from mirascope.base import BaseConfig
from mirascope.openai import OpenAICall, OpenAICallParams, azure_client_wrapper
from pydantic import ConfigDict


class HonchoCall(OpenAICall):
model_config = ConfigDict(arbitrary_types_allowed=True)
configuration = BaseConfig(
client_wrappers=[
azure_client_wrapper(
Expand Down
28 changes: 5 additions & 23 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@
ConsoleSpanExporter,
SimpleSpanProcessor,
)
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
from slowapi.util import get_remote_address
from starlette.exceptions import HTTPException as StarletteHTTPException

from src.routers import (
Expand Down Expand Up @@ -230,32 +226,18 @@ async def lifespan(app: FastAPI):

router = APIRouter(prefix="/apps/{app_id}/users/{user_id}")

# Create a Limiter instance
limiter = Limiter(key_func=get_remote_address, default_limits=["100/minute"])

# Add SlowAPI middleware to the application
app.state.limiter = limiter
app.add_exception_handler(
exc_class_or_status_code=RateLimitExceeded,
handler=_rate_limit_exceeded_handler, # type: ignore
)
app.add_middleware(SlowAPIMiddleware)


add_pagination(app)


@app.exception_handler(StarletteHTTPException)
async def http_exception_handler(request, exc):
current_span = trace.get_current_span()
if (current_span is not None) and (current_span.is_recording()):
current_span.set_attributes(
{
"http.status_text": str(exc.detail),
"otel.status_description": f"{exc.status_code} / {str(exc.detail)}",
"otel.status_code": "ERROR",
}
)
current_span.set_attributes({
"http.status_text": str(exc.detail),
"otel.status_description": f"{exc.status_code} / {str(exc.detail)}",
"otel.status_code": "ERROR",
})
return PlainTextResponse(
json.dumps({"detail": str(exc.detail)}), status_code=exc.status_code
)
Expand Down

0 comments on commit 3acef51

Please sign in to comment.