Skip to content

Commit

Permalink
Fix ServingRuntime args (#291)
Browse files Browse the repository at this point in the history
* fix runtime args

Signed-off-by: jooho lee <[email protected]>

* add disable_custom_all_reduce solving vllm-8735 issue

Signed-off-by: jooho lee <[email protected]>

---------

Signed-off-by: jooho lee <[email protected]>
  • Loading branch information
Jooho authored Nov 12, 2024
1 parent 45fe5c3 commit f7b9333
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions config/runtimes/vllm-multinode-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ objects:
- |
ray start --head --disable-usage-stats --include-dashboard false
# wait for other node to join
until [[ $(ray status | grep -c node_) -eq ${PIPELINE_PARALLEL_SIZE} ]]; do
until [[ $(ray status --address $RAY_ADDRESS | grep -c node_) -eq ${PIPELINE_PARALLEL_SIZE} ]]; do
echo "Waiting..."
sleep 1
done
Expand All @@ -50,7 +50,7 @@ objects:
export SERVED_MODEL_NAME=${MODEL_NAME}
export MODEL_NAME=${MODEL_DIR}
exec python3 -m vllm.entrypoints.openai.api_server --port=8080 --distributed-executor-backend ray --model=${MODEL_NAME} --served-model-name=${SERVED_MODEL_NAME}
exec python3 -m vllm.entrypoints.openai.api_server --port=8080 --distributed-executor-backend ray --model=${MODEL_NAME} --served-model-name=${SERVED_MODEL_NAME} --tensor-parallel-size=${TENSOR_PARALLEL_SIZE} --pipeline-parallel-size=${PIPELINE_PARALLEL_SIZE} --disable_custom_all_reduce
env:
- name: RAY_PORT
value: "6379"
Expand All @@ -62,8 +62,10 @@ objects:
fieldPath: metadata.namespace
- name: VLLM_NO_USAGE_STATS
value: "1"
- name: HF_HUB_CACHE
- name: HOME
value: /tmp
- name: HF_HOME
value: /tmp/hf_home
resources:
limits:
cpu: "16"
Expand Down Expand Up @@ -116,7 +118,7 @@ objects:
fi
# Check if the registered ray nodes count is the same as PIPELINE_PARALLEL_SIZE
gpu_status=$(ray status | grep GPU)
gpu_status=$(ray status --address $RAY_ADDRESS | grep GPU)
if [[ -z $gpu_status ]]; then
echo "$1: GPU does not exist"
exit 1
Expand Down

0 comments on commit f7b9333

Please sign in to comment.