From d6dbde89f1bb571d67a51ba248cdaca2fcb729ce Mon Sep 17 00:00:00 2001 From: Lilac09 <74996885+Zhengjin-Wang@users.noreply.github.com> Date: Tue, 23 Jan 2024 14:17:05 +0800 Subject: [PATCH] Using original fastchat and add bigdl worker in docker image (#9967) * add vllm worker * add options in entrypoint --- docker/llm/serving/cpu/docker/Dockerfile | 6 +- docker/llm/serving/cpu/docker/entrypoint.sh | 8 +- docker/llm/serving/xpu/docker/Dockerfile | 6 +- docker/llm/serving/xpu/docker/entrypoint.sh | 200 ++++++++++++-------- 4 files changed, 126 insertions(+), 94 deletions(-) diff --git a/docker/llm/serving/cpu/docker/Dockerfile b/docker/llm/serving/cpu/docker/Dockerfile index 7edcc09997d..9047c03fd12 100644 --- a/docker/llm/serving/cpu/docker/Dockerfile +++ b/docker/llm/serving/cpu/docker/Dockerfile @@ -11,11 +11,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini # Install Serving Dependencies RUN cd /llm && \ - git clone https://github.com/analytics-zoo/FastChat.git && \ - cd FastChat && \ - git checkout dev-2023-09-22 && \ - pip3 install -e ".[model_worker,webui]" && \ - cd /llm && \ + pip install --pre --upgrade bigdl-llm[serving] && \ chmod +x /opt/entrypoint.sh && \ chmod +x /sbin/tini && \ cp /sbin/tini /usr/bin/tini diff --git a/docker/llm/serving/cpu/docker/entrypoint.sh b/docker/llm/serving/cpu/docker/entrypoint.sh index 48c0ce055ea..7fd1e5ab322 100644 --- a/docker/llm/serving/cpu/docker/entrypoint.sh +++ b/docker/llm/serving/cpu/docker/entrypoint.sh @@ -134,9 +134,9 @@ else done if [ "$worker_type" == "model_worker" ]; then - worker_type="fastchat.serve.model_worker" + worker_type="bigdl.llm.serving.model_worker" elif [ "$worker_type" == "vllm_worker" ]; then - worker_type="fastchat.serve.vllm_worker" + worker_type="bigdl.llm.serving.vllm_worker" fi if [[ -n $CONTROLLER_HOST ]]; then @@ -213,9 +213,9 @@ else echo "Worker type: $worker_type" echo "Worker address: $worker_address" echo "Controller address: $controller_address" - if [ "$worker_type" == "fastchat.serve.model_worker" ]; then + if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval - elif [ "$worker_type" == "fastchat.serve.vllm_worker" ]; then + elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address fi fi diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile index a8ad97d75da..31acc27bd12 100644 --- a/docker/llm/serving/xpu/docker/Dockerfile +++ b/docker/llm/serving/xpu/docker/Dockerfile @@ -10,11 +10,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh # Install Serving Dependencies RUN cd /llm && \ - git clone https://github.com/analytics-zoo/FastChat.git && \ - cd FastChat && \ - git checkout dev-2023-09-22 && \ - pip3 install -e ".[model_worker,webui]" && \ - cd /llm && \ + pip install --pre --upgrade bigdl-llm[serving] && \ chmod +x /opt/entrypoint.sh diff --git a/docker/llm/serving/xpu/docker/entrypoint.sh b/docker/llm/serving/xpu/docker/entrypoint.sh index 92ea43c5437..705797f3230 100644 --- a/docker/llm/serving/xpu/docker/entrypoint.sh +++ b/docker/llm/serving/xpu/docker/entrypoint.sh @@ -1,17 +1,17 @@ #!/bin/bash usage() { - echo "Usage: $0 [--service-model-path ] [--help]" - echo "--help: Print help message." - echo "--service-model-path: set model path for model worker" - echo "The following environment variables can be set." - echo "CONTROLLER_HOST (default: localhost)." - echo "CONTROLLER_PORT (default: 21001)." - echo "WORKER_HOST (default: localhost)." - echo "WORKER_PORT (default: 21002)." - echo "API_HOST (default: localhost)." - echo "API_PORT (default: 8000)." - exit 1 + echo "Usage: $0 [-m --mode ] [-w --worker ] [--help]" + echo "--help: Print help message." + echo "The following environment variables can be set." + echo "MODEL_PATH (default: empty)." + echo "CONTROLLER_HOST (default: localhost)." + echo "CONTROLLER_PORT (default: 21001)." + echo "WORKER_HOST (default: localhost)." + echo "WORKER_PORT (default: 21002)." + echo "API_HOST (default: localhost)." + echo "API_PORT (default: 8000)." + exit 1 } # Default values @@ -21,79 +21,119 @@ worker_host="localhost" worker_port="21002" api_host="localhost" api_port="8000" -service_model_path="" +model_path="" +mode="" +dispatch_method="shortest_queue" # shortest_queue or lottery +stream_interval=1 +worker_type="model_worker" # We do not have any arguments, just run bash if [ "$#" == 0 ]; then - echo "[INFO] no command is passed in" - echo "[INFO] enter pass-through mode" - exec /usr/bin/bash -s -- "bash" + echo "[INFO] no command is passed in" + echo "[INFO] enter pass-through mode" + exec /usr/bin/bash -s -- "bash" else - # Parse command-line options - options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@") - if [ $? != 0 ]; then - usage - fi - eval set -- "$options" - - while true; do - case "$1" in - --service-model-path) - service_model_path="$2" - shift 2 - ;; - --help) - usage - ;; - --) - shift - break - ;; - *) - usage - ;; - esac - done - - if [[ -n $CONTROLLER_HOST ]]; then - controller_host=$CONTROLLER_HOST - fi - - if [[ -n $CONTROLLER_PORT ]]; then - controller_port=$CONTROLLER_PORT - fi - - if [[ -n $WORKER_HOST ]]; then - worker_host=$WORKER_HOST - fi - - if [[ -n $WORKER_PORT ]]; then - worker_port=$WORKER_PORT - fi - - if [[ -n $API_HOST ]]; then - api_host=$API_HOST - fi - - if [[ -n $API_PORT ]]; then - api_port=$API_PORT - fi - - controller_address="http://$controller_host:$controller_port" - worker_address="http://$worker_host:$worker_port" - api_address="http://$api_host:$api_port" - - unset http_proxy - unset https_proxy - - python3 -m fastchat.serve.controller --host $controller_host --port $controller_port & - python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address & - python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address & - - echo "Controller address: $controller_address" - echo "Worker address: $worker_address" - echo "OpenAI API address: $api_address" - + # Parse command-line options + options=$(getopt -o "m:hw:" --long "mode:,help,worker:" -n "$0" -- "$@") + if [ $? != 0 ]; then + usage + fi + eval set -- "$options" + + while true; do + case "$1" in + -m|--mode) + mode="$2" + [[ $mode == "controller" || $mode == "worker" ]] || usage + shift 2 + ;; + -w|--worker) + worker_type="$2" + [[ $worker_type == "model_worker" || $worker_type == "vllm_worker" ]] || usage + shift 2 + ;; + -h|--help) + usage + ;; + --) + shift + break + ;; + *) + usage + ;; + esac + done + + if [ "$worker_type" == "model_worker" ]; then + worker_type="bigdl.llm.serving.model_worker" + elif [ "$worker_type" == "vllm_worker" ]; then + worker_type="bigdl.llm.serving.vllm_worker" + fi + + if [[ -n $CONTROLLER_HOST ]]; then + controller_host=$CONTROLLER_HOST + fi + + if [[ -n $CONTROLLER_PORT ]]; then + controller_port=$CONTROLLER_PORT + fi + + if [[ -n $WORKER_HOST ]]; then + worker_host=$WORKER_HOST + fi + + if [[ -n $WORKER_PORT ]]; then + worker_port=$WORKER_PORT + fi + + if [[ -n $MODEL_PATH ]]; then + model_path=$MODEL_PATH + fi + + if [[ -n $API_HOST ]]; then + api_host=$API_HOST + fi + + if [[ -n $API_PORT ]]; then + api_port=$API_PORT + fi + + if [[ -n $DISPATCH_METHOD ]]; then + dispatch_method=$DISPATCH_METHOD + fi + + if [[ -n $STREAM_INTERVAL ]]; then + stream_interval=$STREAM_INTERVAL + fi + + controller_address="http://$controller_host:$controller_port" + + unset http_proxy + unset https_proxy + + if [[ $mode == "controller" ]]; then + + api_address="http://$api_host:$api_port" + echo "Controller address: $controller_address" + echo "OpenAI API address: $api_address" + python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method & + python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address + + else + + worker_address="http://$worker_host:$worker_port" + echo "Worker type: $worker_type" + echo "Worker address: $worker_address" + echo "Controller address: $controller_address" + + if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then + python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval + elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then + python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address + fi + fi + fi exec /usr/bin/bash -s -- "bash"