Merge branch 'main' into remove_gateway

opea-project · Dec 13, 2024 · 564831e · 564831e
2 parents 3bac11d + c760cac
commit 564831e
Show file tree

Hide file tree

Showing 31 changed files with 700 additions and 403 deletions.
diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -97,7 +97,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   chatqna-gaudi-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-gaudi-backend-server

diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml
@@ -1286,7 +1286,6 @@ spec:
               type: RuntimeDefault
           image: "opea/vllm-gaudi:latest"
           args:
-            - "--enforce-eager"
             - "--model"
             - "$(MODEL_ID)"
             - "--tensor-parallel-size"

diff --git a/ChatQnA/tests/test_compose_vllm_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_on_gaudi.sh
@@ -39,7 +39,7 @@ function start_services() {
     # Start Docker Containers
     docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
-    until [[ "$n" -ge 100 ]]; do
+    until [[ "$n" -ge 160 ]]; do
         echo "n=$n"
         docker logs vllm-gaudi-server > vllm_service_start.log
         if grep -q "Warmup finished" vllm_service_start.log; then

diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml
diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml
diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
@@ -62,6 +62,17 @@ cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
 docker compose up -d
 ```
 
+Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever.
+In that case, start Docker Containers with compose_without_rerank.yaml
+
+```bash
+export host_ip="YOUR IP ADDR"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
+docker compose -f compose_without_rerank.yaml up -d
+```
+
 ## 4. Validation
 
 Add Knowledge Base via HTTP Links:

diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -0,0 +1,102 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: redis://redis-vector-db:6379
+      REDIS_HOST: redis-vector-db
+      INDEX_NAME: ${INDEX_NAME:-rag-redis}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "/home/ligang/models:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    depends_on:
+      - tei-embedding-service
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: redis://redis-vector-db:6379
+      INDEX_NAME: ${INDEX_NAME:-rag-redis}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+    restart: unless-stopped
+  doc-index-retriever-server:
+    image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest}
+    container_name: doc-index-retriever-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+    ports:
+      - "8889:8889"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-0.0.0.0}
+      EMBEDDING_SERVICE_HOST_IP: embedding
+      EMBEDDING_SERVICE_PORT: ${EMBEDDING_SERVER_PORT:-6000}
+      RETRIEVER_SERVICE_HOST_IP: retriever
+      LOGFLAG: ${LOGFLAG}
+    ipc: host
+    restart: always
+    command: --without-rerank
+
+networks:
+  default:
+    driver: bridge
diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import argparse
 import asyncio
 import os
 from typing import Union
@@ -124,8 +125,37 @@ def start(self):
             output_datatype=Union[RerankedDoc, LLMParamsDoc],
         )
 
+    def add_remote_service_without_rerank(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+
+        self.megaservice.add(embedding).add(retriever)
+        self.megaservice.flow_to(embedding, retriever)
+
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--without-rerank", action="store_true")
+
+    args = parser.parse_args()
+
     chatqna = RetrievalToolService(port=MEGA_SERVICE_PORT)
-    chatqna.add_remote_service()
+    if args.without_rerank:
+        chatqna.add_remote_service_without_rerank()
+    else:
+        chatqna.add_remote_service()
     chatqna.start()