Skip to content

Commit

Permalink
fix nightly perf test (#11251)
Browse files Browse the repository at this point in the history
  • Loading branch information
liu-shaojun authored Jun 7, 2024
1 parent 2f80911 commit 85df5e7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 20 deletions.
38 changes: 19 additions & 19 deletions python/llm/dev/benchmark/all-in-one/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def run_native_int4(repo_id,
in_out_len = in_out.split("-")
in_len = int(in_out_len[0])
out_len = int(in_out_len[1])
input_str = open(f"prompt/{in_len}.txt", 'r').read()
input_str = open(f"prompt/continuation/{in_len}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
n_ctx = in_len + out_len if in_len + out_len > 512 else 512
Expand Down Expand Up @@ -272,7 +272,7 @@ def run_transformer_int4(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -337,7 +337,7 @@ def run_pytorch_autocast_bf16(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -408,7 +408,7 @@ def run_optimize_model(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -597,7 +597,7 @@ def run_optimize_model_gpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -669,7 +669,7 @@ def run_ipex_fp16_gpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -747,7 +747,7 @@ def run_bigdl_fp16_gpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -839,7 +839,7 @@ def run_deepspeed_transformer_int4_cpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -926,7 +926,7 @@ def run_transformer_int4_gpu_win(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1035,7 +1035,7 @@ def run_transformer_int4_fp16_gpu_win(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1139,7 +1139,7 @@ def run_transformer_int4_loadlowbit_gpu_win(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1228,7 +1228,7 @@ def run_transformer_autocast_bf16( repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def run_bigdl_ipex_bf16(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1369,7 +1369,7 @@ def run_bigdl_ipex_int4(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1439,7 +1439,7 @@ def run_bigdl_ipex_int8(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1550,7 +1550,7 @@ def get_int_from_env(env_keys, default):
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192] and test_length < 8192:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1627,7 +1627,7 @@ def run_speculative_cpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1705,7 +1705,7 @@ def run_speculative_gpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down Expand Up @@ -1825,7 +1825,7 @@ def run_pipeline_parallel_gpu(repo_id,
test_length = min(in_len*2, 8192)
while test_length not in [32, 256, 1024, 2048, 8192]:
test_length = test_length * 2
input_str = open(f"prompt/{test_length}.txt", 'r').read()
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
# As different tokenizer has different encodings,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def benchmark(
)
args = parser.parse_args()
PROMPT_LENGTH = args.prompt_length
PROMPT = open(f"prompt/{PROMPT_LENGTH}.txt", "r").read()
PROMPT = open(f"prompt/continuation/{PROMPT_LENGTH}.txt", "r").read()
MAX_TOKENS = args.max_new_tokens


Expand Down

0 comments on commit 85df5e7

Please sign in to comment.