From d6dbde89f1bb571d67a51ba248cdaca2fcb729ce Mon Sep 17 00:00:00 2001
From: Lilac09 <74996885+Zhengjin-Wang@users.noreply.github.com>
Date: Tue, 23 Jan 2024 14:17:05 +0800
Subject: [PATCH] Using original fastchat and add bigdl worker in docker image
 (#9967)

* add vllm worker

* add options in entrypoint
---
 docker/llm/serving/cpu/docker/Dockerfile    |   6 +-
 docker/llm/serving/cpu/docker/entrypoint.sh |   8 +-
 docker/llm/serving/xpu/docker/Dockerfile    |   6 +-
 docker/llm/serving/xpu/docker/entrypoint.sh | 200 ++++++++++++--------
 4 files changed, 126 insertions(+), 94 deletions(-)
diff --git a/docker/llm/serving/cpu/docker/Dockerfile b/docker/llm/serving/cpu/docker/Dockerfile
index 7edcc09997d..9047c03fd12 100644
--- a/docker/llm/serving/cpu/docker/Dockerfile
+++ b/docker/llm/serving/cpu/docker/Dockerfile
@@ -11,11 +11,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh
 ADD  https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini
 # Install Serving Dependencies
 RUN cd /llm && \
-    git clone https://github.com/analytics-zoo/FastChat.git && \
-    cd FastChat && \
-    git checkout dev-2023-09-22 && \
-    pip3 install -e ".[model_worker,webui]" && \
-    cd /llm && \
+    pip install --pre --upgrade bigdl-llm[serving] && \
     chmod +x /opt/entrypoint.sh && \
     chmod +x /sbin/tini && \
     cp /sbin/tini /usr/bin/tini
diff --git a/docker/llm/serving/cpu/docker/entrypoint.sh b/docker/llm/serving/cpu/docker/entrypoint.sh
index 48c0ce055ea..7fd1e5ab322 100644
--- a/docker/llm/serving/cpu/docker/entrypoint.sh
+++ b/docker/llm/serving/cpu/docker/entrypoint.sh
@@ -134,9 +134,9 @@ else
   done
 
   if [ "$worker_type" == "model_worker" ]; then
-      worker_type="fastchat.serve.model_worker"
+      worker_type="bigdl.llm.serving.model_worker"
   elif [ "$worker_type" == "vllm_worker" ]; then
-      worker_type="fastchat.serve.vllm_worker"
+      worker_type="bigdl.llm.serving.vllm_worker"
   fi
 
   if [[ -n $CONTROLLER_HOST ]]; then
@@ -213,9 +213,9 @@ else
     echo "Worker type: $worker_type"
     echo "Worker address: $worker_address"
     echo "Controller address: $controller_address"
-    if [ "$worker_type" == "fastchat.serve.model_worker" ]; then
+    if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
       python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
-    elif [ "$worker_type" == "fastchat.serve.vllm_worker" ]; then
+    elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
       python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
     fi
   fi
diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index a8ad97d75da..31acc27bd12 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -10,11 +10,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh
 
 # Install Serving Dependencies
 RUN cd /llm && \
-    git clone https://github.com/analytics-zoo/FastChat.git && \
-    cd FastChat && \
-    git checkout dev-2023-09-22 && \
-    pip3 install -e ".[model_worker,webui]" && \
-    cd /llm && \
+    pip install --pre --upgrade bigdl-llm[serving] && \
     chmod +x /opt/entrypoint.sh
 
 
diff --git a/docker/llm/serving/xpu/docker/entrypoint.sh b/docker/llm/serving/xpu/docker/entrypoint.sh
index 92ea43c5437..705797f3230 100644
--- a/docker/llm/serving/xpu/docker/entrypoint.sh
+++ b/docker/llm/serving/xpu/docker/entrypoint.sh
@@ -1,17 +1,17 @@
 #!/bin/bash
 
 usage() {
-  echo "Usage: $0 [--service-model-path <service model path>] [--help]"
-  echo "--help: Print help message."
-  echo "--service-model-path: set model path for model worker"
-  echo "The following environment variables can be set."
-  echo "CONTROLLER_HOST (default: localhost)."
-  echo "CONTROLLER_PORT (default: 21001)."
-  echo "WORKER_HOST (default: localhost)."
-  echo "WORKER_PORT (default: 21002)."
-  echo "API_HOST (default: localhost)."
-  echo "API_PORT (default: 8000)."
-  exit 1
+    echo "Usage: $0 [-m --mode <controller|worker>] [-w --worker <model_worker|vllm_worker>] [--help]"
+    echo "--help: Print help message."
+    echo "The following environment variables can be set."
+    echo "MODEL_PATH (default: empty)."
+    echo "CONTROLLER_HOST (default: localhost)."
+    echo "CONTROLLER_PORT (default: 21001)."
+    echo "WORKER_HOST (default: localhost)."
+    echo "WORKER_PORT (default: 21002)."
+    echo "API_HOST (default: localhost)."
+    echo "API_PORT (default: 8000)."
+    exit 1
 }
 
 # Default values
@@ -21,79 +21,119 @@ worker_host="localhost"
 worker_port="21002"
 api_host="localhost"
 api_port="8000"
-service_model_path=""
+model_path=""
+mode=""
+dispatch_method="shortest_queue" # shortest_queue or lottery
+stream_interval=1
+worker_type="model_worker"
 
 # We do not have any arguments, just run bash
 if [ "$#" == 0 ]; then
-  echo "[INFO] no command is passed in"
-  echo "[INFO] enter pass-through mode"
-  exec /usr/bin/bash -s -- "bash"
+    echo "[INFO] no command is passed in"
+    echo "[INFO] enter pass-through mode"
+    exec /usr/bin/bash -s -- "bash"
 else
-  # Parse command-line options
-  options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
-  if [ $? != 0 ]; then
-    usage
-  fi
-  eval set -- "$options"
-
-  while true; do
-    case "$1" in
-    --service-model-path)
-        service_model_path="$2"
-        shift 2
-        ;;
-    --help)
-      usage
-      ;;
-    --)
-      shift
-      break
-      ;;
-    *)
-      usage
-      ;;
-    esac
-  done
-
-  if [[ -n $CONTROLLER_HOST ]]; then
-    controller_host=$CONTROLLER_HOST
-  fi
-
-  if [[ -n $CONTROLLER_PORT ]]; then
-    controller_port=$CONTROLLER_PORT
-  fi
-
-  if [[ -n $WORKER_HOST ]]; then
-    worker_host=$WORKER_HOST
-  fi
-
-  if [[ -n $WORKER_PORT ]]; then
-    worker_port=$WORKER_PORT
-  fi
-
-  if [[ -n $API_HOST ]]; then
-    api_host=$API_HOST
-  fi
-
-  if [[ -n $API_PORT ]]; then
-    api_port=$API_PORT
-  fi
-
-  controller_address="http://$controller_host:$controller_port"
-  worker_address="http://$worker_host:$worker_port"
-  api_address="http://$api_host:$api_port"
-
-  unset http_proxy
-  unset https_proxy
-
-  python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
-  python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
-  python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
-
-  echo "Controller address: $controller_address"
-  echo "Worker address: $worker_address"
-  echo "OpenAI API address: $api_address"
-
+    # Parse command-line options
+    options=$(getopt -o "m:hw:" --long "mode:,help,worker:" -n "$0" -- "$@")
+    if [ $? != 0 ]; then
+        usage
+    fi
+    eval set -- "$options"
+    
+    while true; do
+        case "$1" in
+            -m|--mode)
+                mode="$2"
+                [[ $mode == "controller" || $mode == "worker" ]] || usage
+                shift 2
+            ;;
+            -w|--worker)
+                worker_type="$2"
+                [[ $worker_type == "model_worker" || $worker_type == "vllm_worker" ]] || usage
+                shift 2
+            ;;
+            -h|--help)
+                usage
+            ;;
+            --)
+                shift
+                break
+            ;;
+            *)
+                usage
+            ;;
+        esac
+    done
+    
+    if [ "$worker_type" == "model_worker" ]; then
+        worker_type="bigdl.llm.serving.model_worker"
+    elif [ "$worker_type" == "vllm_worker" ]; then
+        worker_type="bigdl.llm.serving.vllm_worker"
+    fi
+    
+    if [[ -n $CONTROLLER_HOST ]]; then
+        controller_host=$CONTROLLER_HOST
+    fi
+    
+    if [[ -n $CONTROLLER_PORT ]]; then
+        controller_port=$CONTROLLER_PORT
+    fi
+    
+    if [[ -n $WORKER_HOST ]]; then
+        worker_host=$WORKER_HOST
+    fi
+    
+    if [[ -n $WORKER_PORT ]]; then
+        worker_port=$WORKER_PORT
+    fi
+    
+    if [[ -n $MODEL_PATH ]]; then
+        model_path=$MODEL_PATH
+    fi
+    
+    if [[ -n $API_HOST ]]; then
+        api_host=$API_HOST
+    fi
+    
+    if [[ -n $API_PORT ]]; then
+        api_port=$API_PORT
+    fi
+    
+    if [[ -n $DISPATCH_METHOD ]]; then
+        dispatch_method=$DISPATCH_METHOD
+    fi
+    
+    if [[ -n $STREAM_INTERVAL ]]; then
+        stream_interval=$STREAM_INTERVAL
+    fi
+    
+    controller_address="http://$controller_host:$controller_port"
+    
+    unset http_proxy
+    unset https_proxy
+    
+    if [[ $mode == "controller" ]]; then
+        
+        api_address="http://$api_host:$api_port"
+        echo "Controller address: $controller_address"
+        echo "OpenAI API address: $api_address"
+        python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
+        python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
+        
+    else
+        
+        worker_address="http://$worker_host:$worker_port"
+        echo "Worker type: $worker_type"
+        echo "Worker address: $worker_address"
+        echo "Controller address: $controller_address"
+        
+        if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
+            python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
+        elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
+            python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
+        fi
+    fi
+    
 fi
 
 exec /usr/bin/bash -s -- "bash"