-
Notifications
You must be signed in to change notification settings - Fork 557
/
docker-compose.yaml
86 lines (83 loc) · 2.87 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
include:
- path:
- ../../local_deploy/docker-compose-vectordb.yaml
- ../../local_deploy/docker-compose-nim-ms.yaml
services:
chain-server:
container_name: chain-server
image: chain-server:${TAG:-latest}
build:
context: ../../../../
dockerfile: RAG/src/chain_server/Dockerfile
args:
EXAMPLE_PATH: 'advanced_rag/multi_turn_rag'
volumes:
- ./prompt.yaml:/prompt.yaml
command: --port 8081 --host 0.0.0.0
environment:
EXAMPLE_PATH: 'advanced_rag/multi_turn_rag'
APP_VECTORSTORE_URL: "http://milvus:19530"
APP_VECTORSTORE_NAME: "milvus"
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
APP_TEXTSPLITTER_CHUNKSIZE: 506
APP_TEXTSPLITTER_CHUNKOVERLAP: 200
APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
APP_RETRIEVER_TOPK: 4
APP_RETRIEVER_SCORETHRESHOLD: 0.25
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_DB: ${POSTGRES_DB:-api}
COLLECTION_NAME: ${COLLECTION_NAME:-multi_turn_rag}
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
ENABLE_TRACING: false
LOGLEVEL: ${LOGLEVEL:-INFO}
ports:
- "8081:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
nemollm-embedding:
condition: service_healthy
required: false
nemollm-inference:
condition: service_healthy
required: false
ranking-ms:
condition: service_healthy
required: false
rag-playground:
container_name: rag-playground
image: rag-playground:${TAG:-latest}
build:
context: ../../../../RAG/src/rag_playground/
dockerfile: Dockerfile
args:
PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default}
command: --port 8090
environment:
APP_SERVERURL: http://chain-server
APP_SERVERPORT: 8081
APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
ENABLE_TRACING: false
ports:
- "8090:8090"
expose:
- "8090"
depends_on:
- chain-server
networks:
default:
name: nvidia-rag