Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(agents-api): Add litellm proxy to docker compose #448

Merged
merged 1 commit into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ services:
container_name: text-embeddings-inference
environment:
- DTYPE=float16
- MODEL_ID=BAAI/bge-m3
- MODEL_ID=Alibaba-NLP/gte-large-en-v1.5

image: ghcr.io/huggingface/text-embeddings-inference:1.3
image: ghcr.io/huggingface/text-embeddings-inference:1.5
ports:
- "8082:80"
volumes:
Expand Down
1,917 changes: 573 additions & 1,344 deletions agents-api/poetry.lock

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions agents-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,19 @@ pycozo = {extras = ["embedded"], version = "^0.7.6"}
uvicorn = "^0.23.2"
fire = "^0.5.0"
environs = "^10.3.0"
google-cloud-aiplatform = "^1.33.0"
pandas = "^2.1.0"
openai = "^1.12.0"
httpx = "^0.26.0"
async-lru = "^2.0.4"
sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"}
temporalio = "^1.4.0"
pydantic = "^2.5.3"
arrow = "^1.3.0"
jinja2 = "^3.1.3"
jinja2schema = "^0.1.4"
jsonschema = "^4.21.1"
litellm = "^1.35.32"
litellm = "^1.43.3"
numpy = "^1.26.4"
transformers = "^4.40.1"
tiktoken = "^0.6.0"
xxhash = "^3.4.1"
tiktoken = "^0.7.0"
tenacity = "^8.3.0"
beartype = "^0.18.5"
pydantic-partial = "^0.5.5"
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include:
- ./model-serving/docker-compose.yml
- ./gateway/docker-compose.yml
- ./agents-api/docker-compose.yml
- ./llm-proxy/docker-compose.yml

# TODO: Enable after testing
# - ./monitoring/docker-compose.yml
1 change: 1 addition & 0 deletions llm-proxy/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!.keys
1 change: 1 addition & 0 deletions llm-proxy/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.keys
54 changes: 54 additions & 0 deletions llm-proxy/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
services:
litellm:
image: ghcr.io/berriai/litellm:main-stable
volumes:
- ./litellm-config.yaml:/app/config.yaml
- .keys:/app/.keys
ports:
- "4000:4000"
env_file:
- ../.env
command:
[
"--config",
"/app/config.yaml",
"--port",
"4000",
"--num_workers",
"8",
"--telemetry",
"False"
]

depends_on:
- litellm-db
- litellm-redis

litellm-db:
image: postgres
restart: always
volumes:
- litellm-db-data:/var/lib/postgresql/data
ports:
- "5432:5432"
env_file:
- ../.env
healthcheck:
test: [ "CMD-SHELL", "pg_isready -d litellm -U llmproxy" ]
interval: 1s
timeout: 5s
retries: 10

litellm-redis:
image: redis/redis-stack-server
restart: always
volumes:
- litellm-redis-data:/data
ports:
- "6379:6379"
env_file:
- ../.env

volumes:
litellm-db-data:
litellm-redis-data:
127 changes: 127 additions & 0 deletions llm-proxy/litellm-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
environment_variables:
NO_DOCS: "true"

model_list:
# -*= Paid models =*-
# -------------------

# Gemini models
- model_name: gemini-1.5-pro
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro
tags: ["paid"]
vertex_credentials: os.environ/GOOGLE_APPLICATION_CREDENTIALS

- model_name: claude-3.5-sonnet
litellm_params:
model: vertex_ai/claude-3-5-sonnet@20240620
tags: ["paid"]
vertex_credentials: os.environ/GOOGLE_APPLICATION_CREDENTIALS

# OpenAI models
- model_name: "gpt-4-turbo"
litellm_params:
model: "openai/gpt-4-turbo"
tags: ["paid"]
api_key: os.environ/OPENAI_API_KEY

- model_name: "gpt-4o"
litellm_params:
model: "openai/gpt-4o"
tags: ["paid"]
api_key: os.environ/OPENAI_API_KEY

# Anthropic models
- model_name: "claude-3.5-sonnet"
litellm_params:
model: "claude-3-5-sonnet-20240620"
tags: ["paid"]
api_key: os.environ/ANTHROPIC_API_KEY

# Groq models
- model_name: "llama-3.1-70b"
litellm_params:
model: "groq/llama-3.1-70b-versatile"
tags: ["paid"]
api_key: os.environ/GROQ_API_KEY

- model_name: "llama-3.1-8b"
litellm_params:
model: "groq/llama-3.1-8b-instant"
tags: ["paid"]
api_key: os.environ/GROQ_API_KEY


# -*= Embedding models =*-
# ------------------------

- model_name: text-embedding-3-large
litellm_params:
model: "openai/text-embedding-3-large"
api_key: os.environ/OPENAI_API_KEY
tags: ["paid"]

- model_name: voyage-multilingual-2
litellm_params:
model: "voyage/voyage-multilingual-2"
api_key: os.environ/VOYAGE_API_KEY
tags: ["paid"]

- model_name: voyage-large-2
litellm_params:
model: "voyage/voyage-large-2"
api_key: os.environ/VOYAGE_API_KEY
tags: ["paid"]

- model_name: gte-large-en-v1.5
litellm_params:
model: openai/Alibaba-NLP/gte-large-en-v1.5
api_base: os.environ/EMBEDDING_SERVICE_BASE
tags: ["free"]

- model_name: bge-m3
litellm_params:
model: openai/BAAI/bge-m3
api_base: os.environ/EMBEDDING_SERVICE_BASE
tags: ["free"]


# -*= Free models =*-
# -------------------

- model_name: gpt-4o-mini
litellm_params:
model: openai/gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
tags: ["free"]


# https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
litellm_settings:
num_retries: 3
request_timeout: 180
allowed_fails: 3
cooldown_time: 30
drop_params: true
modify_params: true
telemetry: false
retry: true
add_function_to_prompt: true

set_verbose: false
cache: true
cache_params: # set cache params for redis
type: redis
namespace: "litellm_caching"
host: os.environ/LITELLM_REDIS_HOST
port: os.environ/LITELLM_REDIS_PORT
password: os.environ/LITELLM_REDIS_PASSWORD

router_settings:
routing_strategy: simple-shuffle
num_retries: 3

general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
database_url: os.environ/LITELLM_DATABASE_URL
enforce_user_param: true
Loading