opea-project · lvliang-intel · Dec 12, 2024 · Dec 2, 2024 · Dec 3, 2024 · Dec 4, 2024
@@ -78,6 +78,9 @@ export https_proxy=${your_http_proxy}
 export EMBEDDER_PORT=6006
 export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
 export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export ASR_ENDPOINT=http://$host_ip:7066
+export ASR_SERVICE_PORT=3001
+export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
 export REDIS_URL="redis://${host_ip}:6379"
 export REDIS_HOST=${host_ip}
 export INDEX_NAME="mm-rag-redis"
@@ -144,7 +147,21 @@ docker build --no-cache -t opea/lvm-llava-svc:latest --build-arg https_proxy=$ht
 docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile .
 ```
 
-### 5. Build MegaService Docker Image
+### 5. Build asr images
+
+Build whisper server image
+
+```bash
+docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
+```
+
+Build asr image
+
+```bash
+docker build --no-cache -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+```
+
+### 6. Build MegaService Docker Image
 
 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the [multimodalqna.py](../../../../multimodalqna.py) Python script. Build MegaService Docker image via below command:
 
@@ -155,7 +172,7 @@ docker build --no-cache -t opea/multimodalqna:latest --build-arg https_proxy=$ht
 cd ../..
 ```
 
-### 6. Build UI Docker Image
+### 7. Build UI Docker Image
 
 Build frontend Docker image via below command:
 
@@ -165,16 +182,19 @@ docker build --no-cache -t opea/multimodalqna-ui:latest --build-arg https_proxy=
 cd ../../../
 ```
 
-Then run the command `docker images`, you will have the following 8 Docker Images:
+Then run the command `docker images`, you will have the following 11 Docker Images:
 
 1. `opea/dataprep-multimodal-redis:latest`
 2. `opea/lvm-llava-svc:latest`
 3. `opea/lvm-llava:latest`
 4. `opea/retriever-multimodal-redis:latest`
-5. `opea/embedding-multimodal:latest`
-6. `opea/embedding-multimodal-bridgetower:latest`
-7. `opea/multimodalqna:latest`
-8. `opea/multimodalqna-ui:latest`
+5. `opea/whisper:latest`
+6. `opea/asr:latest`
+7. `opea/redis-vector-db`
+8. `opea/embedding-multimodal:latest`
+9. `opea/embedding-multimodal-bridgetower:latest`
+10. `opea/multimodalqna:latest`
+11. `opea/multimodalqna-ui:latest`
 
 ## 🚀 Start Microservices
 
@@ -240,7 +260,16 @@ curl http://${host_ip}:7000/v1/multimodal_retrieval \
     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 ```
 
-4. lvm-llava
+4. asr
+
+```bash
+curl ${ASR_SERVICE_ENDPOINT} \
+    -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"byte_str" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
+```
+
+5. lvm-llava
 
 ```bash
 curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \
@@ -249,7 +278,7 @@ curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \
      -d '{"prompt":"Describe the image please.", "img_b64_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}'
 ```
 
-5. lvm-llava-svc
+6. lvm-llava-svc
 
 ```bash
 curl http://${host_ip}:9399/v1/lvm \
@@ -274,7 +303,7 @@ curl http://${host_ip}:9399/v1/lvm \
     -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 ```
 
-6. dataprep-multimodal-redis
+7. dataprep-multimodal-redis
 
 Download a sample video, image, and audio file and create a caption
 
@@ -348,7 +377,7 @@ curl -X POST \
     ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
-7. MegaService
+8. MegaService
 
 ```bash
 curl http://${host_ip}:8888/v1/multimodalqna \
@@ -357,6 +386,12 @@ curl http://${host_ip}:8888/v1/multimodalqna \
     -d '{"messages": "What is the revenue of Nike in 2023?"}'
 ```
 
+```bash
+curl http://${host_ip}:8888/v1/multimodalqna  \
+    -H "Content-Type: application/json"  \
+    -d '{"messages": [{"role": "user", "content": [{"type": "audio", "audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}]}]}'
+```
+
 ```bash
 curl http://${host_ip}:8888/v1/multimodalqna \
     -H "Content-Type: application/json" \

@@ -2,6 +2,27 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "${ASR_SERVICE_PORT}:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+      ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
+      ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
   redis-vector-db:
     image: redis/redis-stack:7.2.0-v9
     container_name: redis-vector-db
@@ -102,6 +123,7 @@ services:
       - embedding-multimodal
       - retriever-multimodal-redis
       - lvm-llava-svc
+      - asr
     ports:
       - "8888:8888"
     environment:
@@ -113,6 +135,8 @@ services:
       MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
       MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
       LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
+      ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
+      ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
     ipc: host
     restart: always
   multimodalqna-ui:

@@ -12,6 +12,9 @@ export https_proxy=${your_http_proxy}
 export EMBEDDER_PORT=6006
 export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
 export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export ASR_ENDPOINT=http://$host_ip:7066
+export ASR_SERVICE_PORT=3001
+export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
 export REDIS_URL="redis://${host_ip}:6379"
 export REDIS_HOST=${host_ip}
 export INDEX_NAME="mm-rag-redis"

@@ -37,6 +37,9 @@ export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-13b-hf"
 export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export ASR_ENDPOINT=http://$host_ip:7066
+export ASR_SERVICE_PORT=3001
+export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
@@ -95,7 +98,21 @@ docker build --no-cache -t opea/lvm-tgi:latest --build-arg https_proxy=$https_pr
 docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile .
 ```
 
-### 5. Build MegaService Docker Image
+### 5. Build asr images
+
+Build whisper server image
+
+```bash
+docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
+```
+
+Build asr image
+
+```bash
+docker build --no-cache -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+```
+
+### 6. Build MegaService Docker Image
 
 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the [multimodalqna.py](../../../../multimodalqna.py) Python script. Build MegaService Docker image via below command:
 
@@ -114,16 +131,19 @@ cd  GenAIExamples/MultimodalQnA/ui/
 docker build --no-cache -t opea/multimodalqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
-Then run the command `docker images`, you will have the following 8 Docker Images:
+Then run the command `docker images`, you will have the following 11 Docker Images:
 
 1. `opea/dataprep-multimodal-redis:latest`
 2. `opea/lvm-tgi:latest`
 3. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 4. `opea/retriever-multimodal-redis:latest`
-5. `opea/embedding-multimodal:latest`
-6. `opea/embedding-multimodal-bridgetower:latest`
-7. `opea/multimodalqna:latest`
-8. `opea/multimodalqna-ui:latest`
+5. `opea/whisper:latest`
+6. `opea/asr:latest`
+7. `opea/redis-vector-db`
+8. `opea/embedding-multimodal:latest`
+9. `opea/embedding-multimodal-bridgetower:latest`
+10. `opea/multimodalqna:latest`
+11. `opea/multimodalqna-ui:latest`
 
 ## 🚀 Start Microservices
 
@@ -189,7 +209,16 @@ curl http://${host_ip}:7000/v1/multimodal_retrieval \
     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 ```
 
-4. TGI LLaVA Gaudi Server
+4. asr
+
+```bash
+curl ${ASR_SERVICE_ENDPOINT} \
+    -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"byte_str" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
+```
+
+5. TGI LLaVA Gaudi Server
 
 ```bash
 curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \
@@ -198,7 +227,7 @@ curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \
     -H 'Content-Type: application/json'
 ```
 
-5. lvm-tgi
+6. lvm-tgi
 
 ```bash
 curl http://${host_ip}:9399/v1/lvm \
@@ -223,7 +252,7 @@ curl http://${host_ip}:9399/v1/lvm \
     -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 ```
 
-6. Multimodal Dataprep Microservice
+7. Multimodal Dataprep Microservice
 
 Download a sample video, image, and audio file and create a caption
 
@@ -297,7 +326,7 @@ curl -X POST \
     ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
-7. MegaService
+8. MegaService
 
 ```bash
 curl http://${host_ip}:8888/v1/multimodalqna \

@@ -8,6 +8,27 @@ services:
     ports:
       - "6379:6379"
       - "8001:8001"
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "${ASR_SERVICE_PORT}:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+      ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
+      ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
   dataprep-multimodal-redis:
     image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
     container_name: dataprep-multimodal-redis
@@ -119,6 +140,7 @@ services:
       - embedding-multimodal
       - retriever-multimodal-redis
       - lvm-tgi
+      - asr
     ports:
       - "8888:8888"
     environment:
@@ -130,6 +152,8 @@ services:
       MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
       MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
       LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
+      ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
+      ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
     ipc: host
     restart: always
   multimodalqna-ui:

@@ -12,6 +12,9 @@ export https_proxy=${your_http_proxy}
 export EMBEDDER_PORT=6006
 export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
 export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export ASR_ENDPOINT=http://$host_ip:7066
+export ASR_SERVICE_PORT=3001
+export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
 export REDIS_URL="redis://${host_ip}:6379"
 export REDIS_HOST=${host_ip}
 export INDEX_NAME="mm-rag-redis"

@@ -59,3 +59,15 @@ services:
       dockerfile: comps/dataprep/multimodal/redis/langchain/Dockerfile
     extends: multimodalqna
     image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
+  whisper-service:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/dependency/Dockerfile
+    extends: multimodalqna
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+  asr:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/Dockerfile
+    extends: multimodalqna
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}