Skip to content

Commit

Permalink
update bash
Browse files Browse the repository at this point in the history
  • Loading branch information
hzjane committed Jul 16, 2024
1 parent 0beb240 commit 9584742
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import torch.distributed as dist
from ipex_llm.transformers import init_pipeline_parallel, ModelRunner
from ipex_llm.serving.api import FastApp
from transformers.utils import logging
from transformers import AutoTokenizer
import uvicorn
Expand Down Expand Up @@ -63,7 +64,6 @@ async def main():
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token

from source import FastApp
myapp = FastApp(local_model, tokenizer)
if local_rank == 0:
config = uvicorn.Config(app=myapp.app, host="0.0.0.0", port=args.port)
Expand Down
4 changes: 4 additions & 0 deletions python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,8 @@ export LOW_BIT="fp8"
export MAX_NUM_SEQS="4"
export MAX_PREFILLED_SEQS=0

if [[ $NUM_GPUS -eq 1 ]]; then
python serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT
else
CCL_ZE_IPC_EXCHANGE=sockets torchrun --standalone --nnodes=1 --nproc-per-node $NUM_GPUS pipeline_serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT --max-num-seqs $MAX_NUM_SEQS --max-prefilled-seqs $MAX_PREFILLED_SEQS
fi

0 comments on commit 9584742

Please sign in to comment.