Skip to content

Commit

Permalink
feat: change oneccl to internal (#12296)
Browse files Browse the repository at this point in the history
* feat: change oneccl

* fix: restore llama-70b

* fix: remove tab

* fix: remove extra blank

* small fix

* add comments

* fix: add a blank space
  • Loading branch information
cranechu0131 authored Oct 31, 2024
1 parent 6f22133 commit 29400e2
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 7 deletions.
3 changes: 2 additions & 1 deletion python/llm/example/GPU/Deepspeed-AutoTP/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ conda activate llm
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
pip install transformers==4.37.0
pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
wget https://sourceforge.net/projects/oneccl-wks/files/2024.0.0.5.1-release/oneccl_wks_installer_2024.0.0.5.1.sh
bash oneccl_wks_installer_2024.0.0.5.1.sh
# configures OneAPI environment variables
source /opt/intel/oneapi/setvars.sh
pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
Expand Down
5 changes: 3 additions & 2 deletions python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def get_int_from_env(env_keys, default):
deepspeed.comm.comm.cdb = None
from deepspeed.comm.comm import init_distributed
init_distributed()

from ipex_llm.utils import BenchmarkWrapper
model = BenchmarkWrapper(model)
print(model)

# Load tokenizer
Expand Down Expand Up @@ -135,7 +136,7 @@ def get_int_from_env(env_keys, default):
actual_output_len = output.shape[1] - input_ids.shape[1]
output_str = tokenizer.decode(output[0], skip_special_tokens=True)
avg_time = (end - st) / actual_output_len * 1000
print(f'Inference time of generating {actual_output_len} tokens: {end-st} s, average token latency is {avg_time} ms/token.')
print(f'Inference time of generating {actual_output_len} tokens: {end-st} s, first token cost {model.first_cost} s, rest tokens average cost {model.rest_cost_mean} s')
print('-'*20, 'Prompt', '-'*20)
print(prompt)
print('-'*20, 'Output', '-'*20)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ export CCL_ZE_IPC_EXCHANGE=sockets
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so:${LD_PRELOAD}
basekit_root=/opt/intel/oneapi
source $basekit_root/setvars.sh --force
source $basekit_root/ccl/latest/env/vars.sh --force
# source $basekit_root/ccl/latest/env/vars.sh --force deprecate oneccl_bind_pt and use internal oneccl for better performance
source /opt/intel/1ccl-wks/setvars.sh

export OMP_NUM_THREADS=$((56/$NUM_GPUS))
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ export CCL_ZE_IPC_EXCHANGE=sockets
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so:${LD_PRELOAD}
basekit_root=/opt/intel/oneapi
source $basekit_root/setvars.sh --force
source $basekit_root/ccl/latest/env/vars.sh --force
# source $basekit_root/ccl/latest/env/vars.sh --force deprecate oneccl_bind_pt and use internal oneccl for better performance
source /opt/intel/1ccl-wks/setvars.sh

NUM_GPUS=2 # number of used GPU
export USE_XETLA=OFF
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ export CCL_ZE_IPC_EXCHANGE=sockets
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so:${LD_PRELOAD}
basekit_root=/opt/intel/oneapi
source $basekit_root/setvars.sh --force
source $basekit_root/ccl/latest/env/vars.sh --force
# source $basekit_root/ccl/latest/env/vars.sh --force deprecate oneccl_bind_pt and use internal oneccl for better performance
source /opt/intel/1ccl-wks/setvars.sh

NUM_GPUS=2 # number of used GPU
export USE_XETLA=OFF
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ export CCL_ZE_IPC_EXCHANGE=sockets
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so:${LD_PRELOAD}
basekit_root=/opt/intel/oneapi
source $basekit_root/setvars.sh --force
source $basekit_root/ccl/latest/env/vars.sh --force
# source $basekit_root/ccl/latest/env/vars.sh --force deprecate oneccl_bind_pt and use internal oneccl for better performance
source /opt/intel/1ccl-wks/setvars.sh

NUM_GPUS=2 # number of used GPU
export USE_XETLA=OFF
Expand Down

0 comments on commit 29400e2

Please sign in to comment.