Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
hzjane committed Jul 16, 2024
1 parent 9584742 commit 2d05730
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
local_rank = my_rank

async def main():
parser = argparse.ArgumentParser(description='Predict Tokens using fastapi by leveraging DeepSpeed-AutoTP')
parser = argparse.ArgumentParser(description='Predict Tokens using fastapi by leveraging Pipeline-Parallel')
parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-chat-hf",
help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-chat-hf`, `meta-llama/Llama-2-13b-chat-hf` and `meta-llama/Llama-2-70b-chat-hf`) to be downloaded'
', or the path to the huggingface checkpoint folder')
Expand Down
5 changes: 3 additions & 2 deletions python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ export MAX_NUM_SEQS="4"
export MAX_PREFILLED_SEQS=0

if [[ $NUM_GPUS -eq 1 ]]; then
python serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT
export ZE_AFFINITY_MASK=0
python serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT
else
CCL_ZE_IPC_EXCHANGE=sockets torchrun --standalone --nnodes=1 --nproc-per-node $NUM_GPUS pipeline_serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT --max-num-seqs $MAX_NUM_SEQS --max-prefilled-seqs $MAX_PREFILLED_SEQS
CCL_ZE_IPC_EXCHANGE=sockets torchrun --standalone --nnodes=1 --nproc-per-node $NUM_GPUS pipeline_serving.py --repo-id-or-model-path $MODEL_PATH --low-bit $LOW_BIT --max-num-seqs $MAX_NUM_SEQS --max-prefilled-seqs $MAX_PREFILLED_SEQS
fi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger = logging.get_logger(__name__)

async def main():
parser = argparse.ArgumentParser(description='Predict Tokens using fastapi by leveraging DeepSpeed-AutoTP')
parser = argparse.ArgumentParser(description='Predict Tokens using fastapi by leveraging ipex-llm')
parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-chat-hf",
help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-chat-hf`, `meta-llama/Llama-2-13b-chat-hf` and `meta-llama/Llama-2-70b-chat-hf`) to be downloaded'
', or the path to the huggingface checkpoint folder')
Expand Down

0 comments on commit 2d05730

Please sign in to comment.