Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix deployment docker compose and move temporal into separate service #471

Merged
merged 6 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 68 additions & 53 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,54 +1,69 @@
AGENTS_API_KEY=myauthkey
AGENTS_API_KEY_HEADER_NAME=Authorization
AGENTS_API_URL=http://agents-api:8080
COZO_AUTH_TOKEN=myauthkey
COZO_HOST=http://memory-store:9070
COZO_PORT=9070
COZO_ROCKSDB_DIR=cozo.db
DTYPE=float16
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference
EMBEDDING_SERVICE_URL=${EMBEDDING_SERVICE_BASE}/embed
GATEWAY_PORT=80
GPU_MEMORY_UTILIZATION=0.90

HF_TOKEN=
HUGGING_FACE_HUB_TOKEN=
JWT_SHARED_KEY=

MAX_MODEL_LEN=8192
MAX_NUM_SEQS=1
MNT_DIR=/data

# Security
# --------
JWT_SHARED_KEY=<your_jwt_shared_key>
AGENTS_API_KEY=<your_agents_api_key>
COZO_AUTH_TOKEN=<your_cozo_auth_token>
TEMPORAL_POSTGRES_PASSWORD=<your_temporal_postgres_password>
LITELLM_POSTGRES_PASSWORD=<your_litellm_postgres_password>
LITELLM_MASTER_KEY=<your_litellm_master_key>
LITELLM_REDIS_PASSWORD=<your_litellm_redis_password>
SKIP_CHECK_DEVELOPER_HEADERS=true
SUMMARIZATION_TOKENS_THRESHOLD=2048
TEMPERATURE_SCALING_FACTOR=0.9
TEMPERATURE_SCALING_POWER=0.9
TEMPORAL_ENDPOINT=temporal:7233
TEMPORAL_NAMESPACE=default
TEMPORAL_WORKER_URL=temporal:7233
TP_SIZE=1
TRUNCATE_EMBED_TEXT=true
TRAEFIK_LOG_LEVEL=DEBUG
WORKER_URL=temporal:7233

AGENTS_API_DEBUG=false
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GROQ_API_KEY=
CLOUDFLARE_API_KEY=
CLOUDFLARE_ACCOUNT_ID=
NVIDIA_NIM_API_KEY=
GITHUB_API_KEY=
VOYAGE_API_KEY=
GOOGLE_APPLICATION_CREDENTIALS=

LITELLM_URL=http://litellm:4000
POSTGRES_DB=litellm
POSTGRES_USER=llmproxy
POSTGRES_PASSWORD=
LITELLM_DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@litellm-db:5432/${POSTGRES_DB}
LITELLM_MASTER_KEY=
LITELLM_REDIS_HOST=litellm-redis
LITELLM_REDIS_PORT=6379
LITELLM_REDIS_PASSWORD=
REDIS_ARGS="--requirepass ${LITELLM_REDIS_PASSWORD}"
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference-<gpu|cpu> # Use the 'gpu' profile to run on GPU

# Memory Store
# -----------

# COZO_HOST=http://memory-store:9070
# COZO_PORT=9070
# COZO_ROCKSDB_DIR=cozo.db
# COZO_BACKUP_DIR=backup
# COZO_MNT_DIR=/data

# Gateway
# ------

# GATEWAY_PORT=80
# TRAEFIK_LOG_LEVEL=INFO

# Agents API
# ---------

# AGENTS_API_KEY_HEADER_NAME=Authorization
# AGENTS_API_URL=http://agents-api:8080
# TRUNCATE_EMBED_TEXT=true
# WORKER_URL=temporal:7233
# AGENTS_API_DEBUG=false
# EMBEDDING_MODEL_ID=Alibaba-NLP/gte-large-en-v1.5
# NUM_GPUS=1

# Temporal
# --------

# TEMPORAL_ENDPOINT=temporal:7233
# TEMPORAL_NAMESPACE=default
# TEMPORAL_WORKER_URL=temporal:7233
# TEMPORAL_POSTGRES_DB=temporal
# TEMPORAL_POSTGRES_USER=temporal

# LiteLLM
# -------

# LITELLM_URL=http://litellm:4000
# LITELLM_POSTGRES_DB=litellm
# LITELLM_POSTGRES_USER=llmproxy
# LITELLM_REDIS_HOST=litellm-redis
# LITELLM_REDIS_PORT=6379

# LLM Providers
# --------------

# OPENAI_API_KEY=<your_openai_api_key>
# HUGGING_FACE_HUB_TOKEN=<your_hugging_face_hub_token>
# ANTHROPIC_API_KEY=<your_anthropic_api_key>
# GROQ_API_KEY=<your_groq_api_key>
# CLOUDFLARE_API_KEY=<your_cloudflare_api_key>
# CLOUDFLARE_ACCOUNT_ID=<your_cloudflare_account_id>
# NVIDIA_NIM_API_KEY=<your_nvidia_nim_api_key>
# GITHUB_API_KEY=<your_github_api_key>
# VOYAGE_API_KEY=<your_voyage_api_key>
# GOOGLE_APPLICATION_CREDENTIALS=.keys/julep-vertexai-svc.json
4 changes: 2 additions & 2 deletions agents-api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ENV PYTHONUNBUFFERED True
ENV POETRY_CACHE_DIR=/tmp/poetry_cache
Expand All @@ -16,4 +16,4 @@ COPY . ./

RUN poetry install --no-dev

ENTRYPOINT ["python", "agents_api/web.py", "--host", "0.0.0.0", "--port", "8080"]
ENTRYPOINT ["python", "-m", "agents_api.web", "--host", "0.0.0.0", "--port", "8080"]
4 changes: 0 additions & 4 deletions agents-api/Dockerfile.temporal

This file was deleted.

2 changes: 1 addition & 1 deletion agents-api/Dockerfile.worker
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ENV PYTHONUNBUFFERED True
ENV POETRY_CACHE_DIR=/tmp/poetry_cache
Expand Down
96 changes: 56 additions & 40 deletions agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,49 @@
name: julep-agents-api
version: "3"

# Base for embedding service
x--text-embeddings-inference: &text-embeddings-inference
container_name: text-embeddings-inference
environment:
- MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}

image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
ports:
- "8082:80"
volumes:
- ~/.cache/huggingface/hub:/data

# Shared environment variables
x-shared-environment: &shared-environment
AGENTS_API_KEY: ${AGENTS_API_KEY}
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY}
COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN}
SKIP_CHECK_DEVELOPER_HEADERS: ${SKIP_CHECK_DEVELOPER_HEADERS:-True}
AGENTS_API_KEY_HEADER_NAME: ${AGENTS_API_KEY_HEADER_NAME:-Authorization}
AGENTS_API_URL: ${AGENTS_API_URL:-http://agents-api:8080}
TRUNCATE_EMBED_TEXT: ${TRUNCATE_EMBED_TEXT:-False}
WORKER_URL: ${WORKER_URL:-temporal:7233}
DEBUG: ${AGENTS_API_DEBUG:-False}
EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
COZO_HOST: ${COZO_HOST:-http://memory-store:9070}
SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo}
TEMPORAL_WORKER_URL: ${TEMPORAL_WORKER_URL:-temporal:7233}
TEMPORAL_NAMESPACE: ${TEMPORAL_NAMESPACE:-default}
TEMPORAL_ENDPOINT: ${TEMPORAL_ENDPOINT:-temporal:7233}
TEMPORAL_TASK_QUEUE: ${TEMPORAL_TASK_QUEUE:-julep-task-queue}

services:
agents-api:
image: julepai/agents-api:dev
env_file: "../.env"

image: julepai/agents-api:${TAG:-dev}
container_name: agents-api
depends_on:
memory-store:
condition: service_started
worker:
condition: service_started
environment:
<<: *shared-environment
build:
context: .
dockerfile: Dockerfile
Expand All @@ -31,15 +63,13 @@ services:
path: Dockerfile

worker:
image: julepai/worker:dev
env_file: "../.env"

image: julepai/worker:${TAG:-dev}
environment:
<<: *shared-environment
build:
context: .
dockerfile: Dockerfile.worker
depends_on:
text-embeddings-inference:
condition: service_started
temporal:
condition: service_started

Expand All @@ -55,49 +85,32 @@ services:
- action: rebuild
path: Dockerfile.worker

text-embeddings-inference:
container_name: text-embeddings-inference
text-embeddings-inference-cpu:
<<: *text-embeddings-inference
profiles:
- '' # Acts as a default profile. See: https://stackoverflow.com/questions/75758174/how-to-make-profile-default-for-docker-compose

text-embeddings-inference-gpu:
<<: *text-embeddings-inference
profiles:
- gpu
image: ghcr.io/huggingface/text-embeddings-inference:1.5
environment:
- DTYPE=float16
- MODEL_ID=Alibaba-NLP/gte-large-en-v1.5
- MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
- NVIDIA_VISIBLE_DEVICES=all

image: ghcr.io/huggingface/text-embeddings-inference:1.5
ports:
- "8082:80"
volumes:
- ~/.cache/huggingface/hub:/data
shm_size: "2gb"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
count: ${NUM_GPUS:-1}
capabilities: [gpu]


temporal:
image: julepai/temporal:dev
container_name: temporal
env_file: "../.env"

build:
context: .
dockerfile: Dockerfile.temporal
ports:
- 7233:7233
volumes:
- temporal_data:/home/temporal

develop:
watch:
- action: rebuild
path: Dockerfile.temporal

cozo-migrate:
image: julepai/cozo-migrate:dev
env_file: "../.env"

image: julepai/cozo-migrate:${TAG:-dev}
container_name: cozo-migrate
depends_on:
memory-store:
Expand All @@ -106,6 +119,9 @@ services:
context: .
dockerfile: Dockerfile.migration
restart: "no" # Make sure to double quote this
environment:
- COZO_HOST=${COZO_HOST:-http://cozo:9070}
- COZO_AUTH_TOKEN=${COZO_AUTH_TOKEN:-myauthkey}

develop:
watch:
Expand Down
2 changes: 1 addition & 1 deletion agents-api/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agents-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ openai = "^1.41.0"
httpx = "^0.27.0"
sentry-sdk = {extras = ["fastapi"], version = "^2.13.0"}
temporalio = "^1.6.0"
pydantic = "^2.8.2"
pydantic = {extras = ["email"], version = "^2.8.2"}
arrow = "^1.3.0"
jinja2 = "^3.1.4"
jinja2schema = "^0.1.4"
Expand Down
13 changes: 0 additions & 13 deletions deploy/.env.example

This file was deleted.

Loading
Loading