Skip to content

Commit

Permalink
Merge branch 'main' into remove_gateway
Browse files Browse the repository at this point in the history
  • Loading branch information
lkk12014402 authored Dec 13, 2024
2 parents 3bac11d + c760cac commit 564831e
Show file tree
Hide file tree
Showing 31 changed files with 700 additions and 403 deletions.
72 changes: 0 additions & 72 deletions ChatQnA/chatqna.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,6 @@ spec:
type: RuntimeDefault
image: "opea/vllm-gaudi:latest"
args:
- "--enforce-eager"
- "--model"
- "$(MODEL_ID)"
- "--tensor-parallel-size"
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/tests/test_compose_vllm_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function start_services() {
# Start Docker Containers
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
until [[ "$n" -ge 160 ]]; do
echo "n=$n"
docker logs vllm-gaudi-server > vllm_service_start.log
if grep -q "Warmup finished" vllm_service_start.log; then
Expand Down
48 changes: 0 additions & 48 deletions CodeGen/codegen.yaml

This file was deleted.

48 changes: 0 additions & 48 deletions CodeTrans/codetrans.yaml

This file was deleted.

11 changes: 11 additions & 0 deletions DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
docker compose up -d
```

Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever.
In that case, start Docker Containers with compose_without_rerank.yaml

```bash
export host_ip="YOUR IP ADDR"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
docker compose -f compose_without_rerank.yaml up -d
```

## 4. Validation

Add Knowledge Base via HTTP Links:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
ports:
- "6007:6007"
- "6008:6008"
- "6009:6009"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME:-rag-redis}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
volumes:
- "/home/ligang/models:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
ports:
- "6000:6000"
ipc: host
depends_on:
- tei-embedding-service
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
INDEX_NAME: ${INDEX_NAME:-rag-redis}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
restart: unless-stopped
doc-index-retriever-server:
image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest}
container_name: doc-index-retriever-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
ports:
- "8889:8889"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-0.0.0.0}
EMBEDDING_SERVICE_HOST_IP: embedding
EMBEDDING_SERVICE_PORT: ${EMBEDDING_SERVER_PORT:-6000}
RETRIEVER_SERVICE_HOST_IP: retriever
LOGFLAG: ${LOGFLAG}
ipc: host
restart: always
command: --without-rerank

networks:
default:
driver: bridge
32 changes: 31 additions & 1 deletion DocIndexRetriever/retrieval_tool.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import argparse
import asyncio
import os
from typing import Union
Expand Down Expand Up @@ -124,8 +125,37 @@ def start(self):
output_datatype=Union[RerankedDoc, LLMParamsDoc],
)

def add_remote_service_without_rerank(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)

self.megaservice.add(embedding).add(retriever)
self.megaservice.flow_to(embedding, retriever)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")

args = parser.parse_args()

chatqna = RetrievalToolService(port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
else:
chatqna.add_remote_service()
chatqna.start()
Loading

0 comments on commit 564831e

Please sign in to comment.