From d0f57a6db2e7c3943e0609f3155af2cebdb37592 Mon Sep 17 00:00:00 2001 From: Diwank Singh Tomer Date: Fri, 20 Sep 2024 00:58:13 -0400 Subject: [PATCH] fix: Fix embedding service issues Signed-off-by: Diwank Singh Tomer --- .env.example | 1 - agents-api/agents_api/clients/old_embed.py | 28 ---------------------- agents-api/docker-compose.yml | 1 - embedding-service/docker-compose.yml | 10 ++++---- llm-proxy/docker-compose.yml | 1 - llm-proxy/litellm-config.yaml | 8 +++---- 6 files changed, 9 insertions(+), 40 deletions(-) delete mode 100644 agents-api/agents_api/clients/old_embed.py diff --git a/.env.example b/.env.example index e3cda1a97..ed8089ed7 100644 --- a/.env.example +++ b/.env.example @@ -8,7 +8,6 @@ LITELLM_POSTGRES_PASSWORD= LITELLM_MASTER_KEY= LITELLM_SALT_KEY= LITELLM_REDIS_PASSWORD= -EMBEDDING_SERVICE_BASE=http://text-embeddings-inference- # Use the 'gpu' profile to run on GPU # Memory Store # ----------- diff --git a/agents-api/agents_api/clients/old_embed.py b/agents-api/agents_api/clients/old_embed.py deleted file mode 100644 index b9412f485..000000000 --- a/agents-api/agents_api/clients/old_embed.py +++ /dev/null @@ -1,28 +0,0 @@ -import httpx - -from ..env import embedding_model_id, embedding_service_base, truncate_embed_text - - -async def embed( - inputs: list[str], - join_inputs=False, - embedding_service_url: str = embedding_service_base + "/embed", - embedding_model_name: str = embedding_model_id, -) -> list[list[float]]: - async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post( - embedding_service_url, - headers={ - "Content-Type": "application/json", - }, - json={ - "inputs": "\n\n".join(inputs) if join_inputs else inputs, - # - # FIXME: We should control the truncation ourselves and truncate before sending - "truncate": truncate_embed_text, - "model_id": embedding_model_name, - }, - ) - resp.raise_for_status() - - return resp.json() diff --git a/agents-api/docker-compose.yml b/agents-api/docker-compose.yml index 2d6701b2f..0ffde732a 100644 --- a/agents-api/docker-compose.yml +++ b/agents-api/docker-compose.yml @@ -12,7 +12,6 @@ x--shared-environment: &shared-environment COZO_HOST: ${COZO_HOST:-http://memory-store:9070} DEBUG: ${AGENTS_API_DEBUG:-False} EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5} - EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference} LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} LITELLM_URL: ${LITELLM_URL:-http://litellm:4000} SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo} diff --git a/embedding-service/docker-compose.yml b/embedding-service/docker-compose.yml index 2ee619f8f..73df579be 100644 --- a/embedding-service/docker-compose.yml +++ b/embedding-service/docker-compose.yml @@ -2,7 +2,8 @@ name: julep-embedding-service # Base for embedding service x--text-embeddings-inference: &text-embeddings-inference - container_name: text-embeddings-inference-cpu + hostname: text-embeddings-inference + container_name: text-embeddings-inference environment: - MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5} @@ -20,7 +21,6 @@ x--shared-environment: &shared-environment COZO_HOST: ${COZO_HOST:-http://memory-store:9070} DEBUG: ${AGENTS_API_DEBUG:-False} EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5} - EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference} LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} LITELLM_URL: ${LITELLM_URL:-http://litellm:4000} SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo} @@ -37,16 +37,16 @@ x--shared-environment: &shared-environment services: text-embeddings-inference-cpu: <<: *text-embeddings-inference + container_name: text-embeddings-inference-cpu profiles: - - '' # Acts as a default profile. See: https://stackoverflow.com/questions/75758174/how-to-make-profile-default-for-docker-compose - - cpu + - embedding-cpu platform: linux/amd64 # Temp fix for Mac M-series chips text-embeddings-inference-gpu: <<: *text-embeddings-inference container_name: text-embeddings-inference-gpu profiles: - - gpu + - embedding-gpu image: ghcr.io/huggingface/text-embeddings-inference:1.5 environment: - DTYPE=float16 diff --git a/llm-proxy/docker-compose.yml b/llm-proxy/docker-compose.yml index ca75164c9..5f02a6a28 100644 --- a/llm-proxy/docker-compose.yml +++ b/llm-proxy/docker-compose.yml @@ -25,7 +25,6 @@ services: - GITHUB_API_KEY=${GITHUB_API_KEY} - VOYAGE_API_KEY=${VOYAGE_API_KEY} - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS} - - EMBEDDING_SERVICE_BASE=${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference-gpu} command: [ "--config", diff --git a/llm-proxy/litellm-config.yaml b/llm-proxy/litellm-config.yaml index 226a814ab..91d2a2c09 100644 --- a/llm-proxy/litellm-config.yaml +++ b/llm-proxy/litellm-config.yaml @@ -94,20 +94,20 @@ model_list: - model_name: Alibaba-NLP/gte-large-en-v1.5 litellm_params: model: openai/Alibaba-NLP/gte-large-en-v1.5 - api_base: os.environ/EMBEDDING_SERVICE_BASE + api_base: http://text-embeddings-inference tags: ["free"] - model_name: BAAI/bge-m3 litellm_params: model: openai/BAAI/bge-m3 - api_base: os.environ/EMBEDDING_SERVICE_BASE + api_base: http://text-embeddings-inference tags: ["free"] - model_name: vertex_ai/text-embedding-004 litellm_params: model: vertex_ai/text-embedding-004 - vertex_project: os.environ/GOOGLE_PROJECT_ID - vertex_location: os.environ/VERTEX_LOCATION + # vertex_project: os.environ/GOOGLE_PROJECT_ID + # vertex_location: os.environ/VERTEX_LOCATION # -*= Free models =*-