From b2103163f15a4be81e05e41c07120d6cd8b562c5 Mon Sep 17 00:00:00 2001
From: Yishuo Wang <yishuo.wang@intel.com>
Date: Thu, 30 May 2024 15:46:13 +0800
Subject: [PATCH] organize the code

---
 .github/workflows/llm_performance_tests.yml   | 34 ++++++--------
 .../test/benchmark/arc-perf-test-batch2.yaml  | 45 +++++++++----------
 python/llm/test/benchmark/arc-perf-test.yaml  | 38 ++++++++--------
 .../arc-perf-transformers-437-batch2.yaml     | 10 ++---
 .../benchmark/arc-perf-transformers-437.yaml  | 10 ++---
 python/llm/test/benchmark/csv_to_html.py      |  2 +-
 6 files changed, 65 insertions(+), 74 deletions(-)

diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index 331c8f7845c..525b708d582 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -106,13 +106,9 @@ jobs:
           sed -i 's/{today}/{today}_test1/g' run.py
           python run.py
           # batch_size 2
-          cd ../../../../../ # go back to ipex-llm
+          cd ../../../../../ 
           cp python/llm/test/benchmark/arc-perf-test-batch2.yaml python/llm/dev/benchmark/all-in-one/config.yaml
           cd python/llm/dev/benchmark/all-in-one
-          # hide time info
-          # sed -i 's/str(end - st)/"xxxxxx"/g' run.py #mwj
-          # change csv name
-          # sed -i 's/{today}/{today}_test1/g' run.py #mwj
           python run.py
 
       - name: Test on xpu(transformers==4.37.0)
@@ -130,53 +126,51 @@ jobs:
           sed -i 's/test1/test2/g' run.py
           python run.py
           # batch_size 2
-          cd ../../../../../ # go back to ipex-llm
+          cd ../../../../../ 
           cp python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml python/llm/dev/benchmark/all-in-one/config.yaml
           cd python/llm/dev/benchmark/all-in-one
-          # change csv name
-          # sed -i 's/test1/test2/g' run.py
           python run.py
 
       - name: Concat csv and generate html
         shell: bash
         run: |
-          # batch 1 
-          cd python/llm/dev/benchmark/all-in-one/test_batch1 #mwj
-          python ../../../../test/benchmark/concat_csv.py #mwj
+          # batch_size 1 
+          cd python/llm/dev/benchmark/all-in-one/test_batch1
+          python ../../../../test/benchmark/concat_csv.py
           for file in *.csv; do
               if [[ $file != *test* ]]; then
                   cp "$file" $CSV_SAVE_PATH
               fi
           done
           python -m pip install pandas==1.5.3
-          cd ../../../../test/benchmark # mwj
+          cd ../../../../test/benchmark
           python csv_to_html.py -f $CSV_SAVE_PATH
-          # batch 2
-          cd ../../../../  # go back to ipex-llm 
-          cd python/llm/dev/benchmark/all-in-one/test_batch2 #mwj
-          python ../../../../test/benchmark/concat_csv.py #mwj
+          # batch_size 2
+          cd ../../../../
+          cd python/llm/dev/benchmark/all-in-one/test_batch2
+          python ../../../../test/benchmark/concat_csv.py
           for file in *.csv; do
               if [[ $file != *test* ]]; then
                   cp "$file" $CSV_SAVE_PATH
               fi
           done
-          cd ../../../../test/benchmark # mwj
+          cd ../../../../test/benchmark
           python csv_to_html.py -f $CSV_SAVE_PATH
 
       - name: Check and upload results to ftp
         shell: bash
         run: |
-          # batch 1
+          # batch_size 1
           cd python/llm/dev/benchmark/all-in-one/test_batch1
           python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test.yaml
           python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437.yaml
           find . -name "*test*.csv" -delete
-          cd ../ # go back to all-in-one dir
+          cd ../
           rm -r test_batch1
           if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then
             curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
           fi
-          # batch 2
+          # batch_size 2
           cd test_batch2
           python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch2.yaml
           python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch2.yaml
diff --git a/python/llm/test/benchmark/arc-perf-test-batch2.yaml b/python/llm/test/benchmark/arc-perf-test-batch2.yaml
index eb9b0cb9808..d88a03bd1ae 100644
--- a/python/llm/test/benchmark/arc-perf-test-batch2.yaml
+++ b/python/llm/test/benchmark/arc-perf-test-batch2.yaml
@@ -1,29 +1,29 @@
 repo_id:
   - 'meta-llama/Llama-2-7b-chat-hf'
   - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'THUDM/chatglm2-6b'
-  # - 'THUDM/chatglm3-6b-4bit'
-  # - 'tiiuae/falcon-7b-instruct-with-patch'
-  # - 'mosaicml/mpt-7b-chat'
-  # - 'redpajama/gptneox-7b-redpajama-bf16'
-  # - 'bigcode/starcoder-15.5b-4bit'
-  # - 'databricks/dolly-v1-6b'
-  # - 'databricks/dolly-v2-7b'
-  # - 'databricks/dolly-v2-12b'
-  # - 'internlm/internlm-chat-7b'
-  # - 'Qwen/Qwen-7B-Chat'
-  # - 'BAAI/AquilaChat-7B'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
-  # - 'bigscience/bloomz-7b1'
+  - 'THUDM/chatglm2-6b'
+  - 'THUDM/chatglm3-6b-4bit'
+  - 'tiiuae/falcon-7b-instruct-with-patch'
+  - 'mosaicml/mpt-7b-chat'
+  - 'redpajama/gptneox-7b-redpajama-bf16'
+  - 'bigcode/starcoder-15.5b-4bit'
+  - 'databricks/dolly-v1-6b'
+  - 'databricks/dolly-v2-7b'
+  - 'databricks/dolly-v2-12b'
+  - 'internlm/internlm-chat-7b'
+  - 'Qwen/Qwen-7B-Chat'
+  - 'BAAI/AquilaChat-7B'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
+  - 'bigscience/bloomz-7b1'
 #  - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
-  # - 'mistralai/Mistral-7B-v0.1'
+  - 'mistralai/Mistral-7B-v0.1'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
-batch_size: # 1 # default to 1
+batch_size: # default to 1
   - 2
 in_out_pairs:
   - '32-32'
@@ -33,10 +33,7 @@ test_api:
   - "transformer_int4_gpu"  # on Intel GPU
 cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 exclude:
-  # - 'bigcode/starcoder-15.5b-4bit:2048:2'
-  # - 'databricks/dolly-v2-12b:2048:2'
-  # - 'baichuan-inc/Baichuan2-7B-Chat:32:2' 
-  # - 'baichuan-inc/Baichuan2-7B-Chat:1024:2' 
-  # - 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
-  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048:2'
-  # - 'bigscience/bloomz-7b1:2048:2'
\ No newline at end of file
+  - 'bigcode/starcoder-15.5b-4bit:2048:2'
+  - 'databricks/dolly-v2-12b:2048:2'
+  - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048:2'
+  - 'bigscience/bloomz-7b1:2048:2'
\ No newline at end of file
diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml
index 90dc51b42f2..2a49a46e6a6 100644
--- a/python/llm/test/benchmark/arc-perf-test.yaml
+++ b/python/llm/test/benchmark/arc-perf-test.yaml
@@ -1,29 +1,29 @@
 repo_id:
   - 'meta-llama/Llama-2-7b-chat-hf'
   - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'THUDM/chatglm2-6b'
-  # - 'THUDM/chatglm3-6b-4bit'
-  # - 'tiiuae/falcon-7b-instruct-with-patch'
-  # - 'mosaicml/mpt-7b-chat'
-  # - 'redpajama/gptneox-7b-redpajama-bf16'
-  # - 'bigcode/starcoder-15.5b-4bit'
-  # - 'databricks/dolly-v1-6b'
-  # - 'databricks/dolly-v2-7b'
-  # - 'databricks/dolly-v2-12b'
-  # - 'internlm/internlm-chat-7b'
-  # - 'Qwen/Qwen-7B-Chat'
-  # - 'BAAI/AquilaChat-7B'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
-  # - 'bigscience/bloomz-7b1'
+  - 'THUDM/chatglm2-6b'
+  - 'THUDM/chatglm3-6b-4bit'
+  - 'tiiuae/falcon-7b-instruct-with-patch'
+  - 'mosaicml/mpt-7b-chat'
+  - 'redpajama/gptneox-7b-redpajama-bf16'
+  - 'bigcode/starcoder-15.5b-4bit'
+  - 'databricks/dolly-v1-6b'
+  - 'databricks/dolly-v2-7b'
+  - 'databricks/dolly-v2-12b'
+  - 'internlm/internlm-chat-7b'
+  - 'Qwen/Qwen-7B-Chat'
+  - 'BAAI/AquilaChat-7B'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat-4bit'
+  - 'bigscience/bloomz-7b1'
 #  - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
-  # - 'mistralai/Mistral-7B-v0.1'
+  - 'mistralai/Mistral-7B-v0.1'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
-batch_size: # 1 # default to 1
+batch_size: # default to 1
   - 1
 in_out_pairs:
   - '32-32'
@@ -33,5 +33,5 @@ test_api:
   - "transformer_int4_gpu"  # on Intel GPU
 cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 exclude:
-  # - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048:1'
-  # - 'bigscience/bloomz-7b1:2048:1'
\ No newline at end of file
+  - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048:1'
+  - 'bigscience/bloomz-7b1:2048:1'
\ No newline at end of file
diff --git a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml
index ca9cd4c4504..805543d8227 100644
--- a/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml
+++ b/python/llm/test/benchmark/arc-perf-transformers-437-batch2.yaml
@@ -1,16 +1,16 @@
 # For the models that require transformers 4.37.0
 repo_id:
   - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen1.5-MoE-A2.7B-Chat'
-  # - 'microsoft/phi-2'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'Qwen/Qwen1.5-MoE-A2.7B-Chat'
+  - 'microsoft/phi-2'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
-batch_size: # 1 # default to 1
+batch_size: # default to 1
   - 2
 in_out_pairs:
   - '32-32'
diff --git a/python/llm/test/benchmark/arc-perf-transformers-437.yaml b/python/llm/test/benchmark/arc-perf-transformers-437.yaml
index 4e8345fbbbf..5bd76b11070 100644
--- a/python/llm/test/benchmark/arc-perf-transformers-437.yaml
+++ b/python/llm/test/benchmark/arc-perf-transformers-437.yaml
@@ -1,16 +1,16 @@
 # For the models that require transformers 4.37.0
 repo_id:
   - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen1.5-MoE-A2.7B-Chat'
-  # - 'microsoft/phi-2'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'Qwen/Qwen1.5-MoE-A2.7B-Chat'
+  - 'microsoft/phi-2'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
 local_model_hub: '/mnt/disk1/models'
 warm_up: 1
 num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
-batch_size: # 1 # default to 1
+batch_size: # default to 1
   - 1
 in_out_pairs:
   - '32-32'
diff --git a/python/llm/test/benchmark/csv_to_html.py b/python/llm/test/benchmark/csv_to_html.py
index 28d546e9d67..ddf3b6847a7 100644
--- a/python/llm/test/benchmark/csv_to_html.py
+++ b/python/llm/test/benchmark/csv_to_html.py
@@ -99,7 +99,7 @@ def main():
             for current_csv_ind,current_csv_row in current_csv.iterrows():
                 current_csv_model=current_csv_row['model'].strip()
                 current_csv_input_output_pairs=current_csv_row['input/output tokens'].strip()
-                try: # mwj edit: add try
+                try:
                   current_csv_batch_size=str(current_csv_row['batch_size'])
                   current_csv_model_input_1st=current_csv_model+'-'+current_csv_input_output_pairs+'-'+current_csv_batch_size+'-'+'1st'
                   current_csv_model_input_2nd=current_csv_model+'-'+current_csv_input_output_pairs+'-'+current_csv_batch_size+'-'+'2nd'