Skip to content

Commit

Permalink
Change igpu perf to mainly test int4+fp16
Browse files Browse the repository at this point in the history
  • Loading branch information
Oscilloscope98 committed Jul 5, 2024
1 parent 72b4efa commit c6088b2
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 96 deletions.
148 changes: 75 additions & 73 deletions .github/workflows/llm_performance_tests.yml

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions python/llm/test/benchmark/igpu-perf/1024-128.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ repo_id:
- 'THUDM/glm-4-9b-chat'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat'
# - '01-ai/Yi-6B'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+
- 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
num_trials: 3
Expand Down
1 change: 0 additions & 1 deletion python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'Qwen/Qwen2-7B-Instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- 'microsoft/Phi-3-mini-4k-instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
Expand Down
6 changes: 2 additions & 4 deletions python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
repo_id:
- 'THUDM/chatglm3-6b'
- 'THUDM/glm-4-9b-chat'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat'
# - '01-ai/Yi-6B'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+
# - 'RWKV/rwkv-4-world-7b'
# - 'RWKV/rwkv-5-world-7b'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
num_trials: 3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- 'Qwen/Qwen2-7B-Instruct'
- 'microsoft/Phi-3-mini-4k-instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
num_trials: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ repo_id:
- 'THUDM/glm-4-9b-chat'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat'
# - '01-ai/Yi-6B'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+
- 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
num_trials: 3
Expand All @@ -16,5 +15,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '1024-128'
test_api:
- "transformer_int4_loadlowbit_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_loadlowbit_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'Qwen/Qwen2-7B-Instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- 'microsoft/Phi-3-mini-4k-instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
Expand All @@ -12,5 +11,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '1024-128'
test_api:
- "transformer_int4_loadlowbit_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_loadlowbit_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ repo_id:
- 'THUDM/glm-4-9b-chat'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat'
# - '01-ai/Yi-6B'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+
- 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
num_trials: 3
Expand All @@ -16,5 +15,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '2048-256'
test_api:
- "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'Qwen/Qwen2-7B-Instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- 'microsoft/Phi-3-mini-4k-instruct'
local_model_hub: 'path to your local model hub'
warm_up: 1
Expand All @@ -12,5 +11,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '2048-256'
test_api:
- "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ repo_id:
- 'THUDM/glm-4-9b-chat'
- 'baichuan-inc/Baichuan2-7B-Chat'
- 'baichuan-inc/Baichuan2-13B-Chat'
# - '01-ai/Yi-6B'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+
- 'meta-llama/Meta-Llama-3-8B-Instruct'
local_model_hub: 'path to your local model hub'
warm_up: 3
num_trials: 5
Expand All @@ -16,5 +15,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '32-32'
test_api:
- "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
repo_id:
- 'Qwen/Qwen1.5-7B-Chat'
- 'Qwen/Qwen2-7B-Instruct'
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- 'microsoft/Phi-3-mini-4k-instruct'
local_model_hub: 'path to your local model hub'
warm_up: 3
Expand All @@ -12,5 +11,5 @@ batch_size: 1 # default to 1
in_out_pairs:
- '32-32'
test_api:
- "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
- "transformer_int4_fp16_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api)

0 comments on commit c6088b2

Please sign in to comment.