Skip to content

Commit

Permalink
fix: Fix embedding service issues
Browse files Browse the repository at this point in the history
Signed-off-by: Diwank Singh Tomer <[email protected]>
  • Loading branch information
creatorrr committed Sep 20, 2024
1 parent 44e66d0 commit d0f57a6
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 40 deletions.
1 change: 0 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ LITELLM_POSTGRES_PASSWORD=<your_litellm_postgres_password>
LITELLM_MASTER_KEY=<your_litellm_master_key>
LITELLM_SALT_KEY=<your_litellm_salt_key>
LITELLM_REDIS_PASSWORD=<your_litellm_redis_password>
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference-<gpu|cpu> # Use the 'gpu' profile to run on GPU

# Memory Store
# -----------
Expand Down
28 changes: 0 additions & 28 deletions agents-api/agents_api/clients/old_embed.py

This file was deleted.

1 change: 0 additions & 1 deletion agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ x--shared-environment: &shared-environment
COZO_HOST: ${COZO_HOST:-http://memory-store:9070}
DEBUG: ${AGENTS_API_DEBUG:-False}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference}
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY}
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo}
Expand Down
10 changes: 5 additions & 5 deletions embedding-service/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ name: julep-embedding-service

# Base for embedding service
x--text-embeddings-inference: &text-embeddings-inference
container_name: text-embeddings-inference-cpu
hostname: text-embeddings-inference
container_name: text-embeddings-inference
environment:
- MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}

Expand All @@ -20,7 +21,6 @@ x--shared-environment: &shared-environment
COZO_HOST: ${COZO_HOST:-http://memory-store:9070}
DEBUG: ${AGENTS_API_DEBUG:-False}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference}
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY}
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo}
Expand All @@ -37,16 +37,16 @@ x--shared-environment: &shared-environment
services:
text-embeddings-inference-cpu:
<<: *text-embeddings-inference
container_name: text-embeddings-inference-cpu
profiles:
- '' # Acts as a default profile. See: https://stackoverflow.com/questions/75758174/how-to-make-profile-default-for-docker-compose
- cpu
- embedding-cpu
platform: linux/amd64 # Temp fix for Mac M-series chips

text-embeddings-inference-gpu:
<<: *text-embeddings-inference
container_name: text-embeddings-inference-gpu
profiles:
- gpu
- embedding-gpu
image: ghcr.io/huggingface/text-embeddings-inference:1.5
environment:
- DTYPE=float16
Expand Down
1 change: 0 additions & 1 deletion llm-proxy/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ services:
- GITHUB_API_KEY=${GITHUB_API_KEY}
- VOYAGE_API_KEY=${VOYAGE_API_KEY}
- GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS}
- EMBEDDING_SERVICE_BASE=${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference-gpu}
command:
[
"--config",
Expand Down
8 changes: 4 additions & 4 deletions llm-proxy/litellm-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,20 @@ model_list:
- model_name: Alibaba-NLP/gte-large-en-v1.5
litellm_params:
model: openai/Alibaba-NLP/gte-large-en-v1.5
api_base: os.environ/EMBEDDING_SERVICE_BASE
api_base: http://text-embeddings-inference
tags: ["free"]

- model_name: BAAI/bge-m3
litellm_params:
model: openai/BAAI/bge-m3
api_base: os.environ/EMBEDDING_SERVICE_BASE
api_base: http://text-embeddings-inference
tags: ["free"]

- model_name: vertex_ai/text-embedding-004
litellm_params:
model: vertex_ai/text-embedding-004
vertex_project: os.environ/GOOGLE_PROJECT_ID
vertex_location: os.environ/VERTEX_LOCATION
# vertex_project: os.environ/GOOGLE_PROJECT_ID
# vertex_location: os.environ/VERTEX_LOCATION


# -*= Free models =*-
Expand Down

0 comments on commit d0f57a6

Please sign in to comment.