From 89554f723b1586ee0d54dcfc93cd33257d17aa58 Mon Sep 17 00:00:00 2001 From: WeiguangHan Date: Mon, 11 Mar 2024 20:00:47 +0800 Subject: [PATCH] LLM: add whisper models into nightly test (#10193) * LLM: add whisper models into nightly test * small fix * small fix * add more whisper models * test all cases * test specific cases * collect the csv * store the resut * to html * small fix * small test * test all cases * modify whisper_csv_to_html --- .github/workflows/llm-whisper-evaluation.yml | 207 ++++++++++++++++++ .../llm/dev/benchmark/whisper/run_whisper.py | 34 ++- .../benchmark/whisper/whisper_concat_csv.py | 50 +++++ .../benchmark/whisper/whisper_csv_to_html.py | 113 ++++++++++ 4 files changed, 400 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/llm-whisper-evaluation.yml create mode 100644 python/llm/dev/benchmark/whisper/whisper_concat_csv.py create mode 100644 python/llm/dev/benchmark/whisper/whisper_csv_to_html.py diff --git a/.github/workflows/llm-whisper-evaluation.yml b/.github/workflows/llm-whisper-evaluation.yml new file mode 100644 index 00000000000..85a94101be2 --- /dev/null +++ b/.github/workflows/llm-whisper-evaluation.yml @@ -0,0 +1,207 @@ +name: LLM Whisper Models Evaluation + +# Cancel previous runs in the PR when you push new commits +concurrency: + group: ${{ github.workflow }}-llm-nightly-test-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +# Controls when the action will run. +on: + schedule: + - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China + pull_request: + branches: [main] + paths: + - ".github/workflows/llm-whisper-evaluation.yml" + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + model_name: + description: 'Model names, separated by comma and must be quoted.' + required: true + type: string + precision: + description: 'Precisions, separated by comma and must be quoted.' + required: true + type: string + task: + description: 'Tasks, separated by comma and must be quoted.' + required: true + type: string + runs-on: + description: 'Labels to filter the runners, separated by comma and must be quoted.' + default: "accuracy" + required: false + type: string + + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + llm-cpp-build: # please uncomment it for PR tests + uses: ./.github/workflows/llm-binary-build.yml + + # Set the testing matrix based on the event (schedule, PR, or manual dispatch) + set-matrix: + runs-on: ubuntu-latest + + outputs: + model_name: ${{ steps.set-matrix.outputs.model_name }} + precision: ${{ steps.set-matrix.outputs.precision }} + task: ${{ steps.set-matrix.outputs.task }} + runner: ${{ steps.set-matrix.outputs.runner }} + + steps: + - name: set-env + env: + MATRIX_MODEL_NAME: '["whisper-tiny", "whisper-small", "whisper-medium", "whisper-base"]' + MATRIX_TASK: '["librispeech"]' + MATRIX_PRECISION: '["sym_int4", "fp8_e5m2"]' + LABELS: '["self-hosted", "llm", "perf"]' + run: | + echo "model_name=$MATRIX_MODEL_NAME" >> $GITHUB_ENV + echo "task=$MATRIX_TASK" >> $GITHUB_ENV + echo "precision=$MATRIX_PRECISION" >> $GITHUB_ENV + echo "runner=$LABELS" >> $GITHUB_ENV + + - name: set-matrix + id: set-matrix + run: | + echo "model_name=$model_name" >> $GITHUB_OUTPUT + echo "task=$task" >> $GITHUB_OUTPUT + echo "precision=$precision" >> $GITHUB_OUTPUT + echo "runner=$runner" >> $GITHUB_OUTPUT + + llm-whisper-evaluation: + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + needs: [llm-cpp-build, set-matrix] # please uncomment it for PR tests + # needs: [set-matrix] # please comment it for PR tests + strategy: + fail-fast: false + matrix: + python-version: ["3.9"] + model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }} + task: ${{ fromJson(needs.set-matrix.outputs.task) }} + precision: ${{ fromJson(needs.set-matrix.outputs.precision) }} + device: [xpu] + runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }} + env: + ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + ORIGIN_DIR: /mnt/disk1/models + + steps: + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade wheel + python -m pip install --upgrade pandas + python -m pip install --upgrade datasets + python -m pip install --upgrade evaluate + python -m pip install --upgrade soundfile + python -m pip install --upgrade librosa + python -m pip install --upgrade jiwer + + # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary + + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + with: + extra-dependency: "xpu_2.1" + + # - name: Install BigDL-LLM from Pypi + # shell: bash + # run: | + # pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu + + # - name: Test installed xpu version + # shell: bash + # run: | + # source /opt/intel/oneapi/setvars.sh + # bash python/llm/test/run-llm-install-tests.sh + + - name: Run whisper evaluation + shell: bash + run: | + + source /opt/intel/oneapi/setvars.sh + export USE_XETLA=OFF + export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + + echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV" + MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/ + export LIBRISPEECH_DATASET_PATH=/mnt/disk1/datasets/librispeech + + cd python/llm/dev/benchmark/whisper + python run_whisper.py --model_path ${MODEL_PATH} --data_type other --device xpu --load_in_low_bit ${{ matrix.precision }} --save_result + + - uses: actions/upload-artifact@v3 + with: + name: whisper_results + path: + ${{ github.workspace }}/python/llm/dev/benchmark/whisper/results/** + + llm-whisper-summary: + if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}} + needs: [set-matrix, llm-whisper-evaluation] + runs-on: ["self-hosted", "llm", "perf"] + steps: + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Set output path + shell: bash + run: | + DATE=$(date +%Y-%m-%d) + OUTPUT_PATH="results_$DATE" + echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV + NIGHTLY_FOLDER="/mnt/disk1/whisper_nightly_gpu" + echo "NIGHTLY_FOLDER=$NIGHTLY_FOLDER" >> $GITHUB_ENV + PR_FOLDER="/mnt/disk1/whisper_pr_gpu" + echo "PR_FOLDER=$PR_FOLDER" >> $GITHUB_ENV + + - name: Download all results for nightly run + if: github.event_name == 'schedule' + uses: actions/download-artifact@v3 + with: + name: whisper_results + path: ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }} + + - name: Download all results for pr run + if: github.event_name == 'pull_request' + uses: actions/download-artifact@v3 + with: + name: whisper_results + path: ${{ env.PR_FOLDER}}/${{ env.OUTPUT_PATH }} + + - name: Summarize the results for nightly run + if: github.event_name == 'schedule' + shell: bash + run: | + cp -r /mnt/disk1/datasets/whisper_fp16_results/* /mnt/disk1/whisper_nightly_gpu/${{ env.OUTPUT_PATH }} + pip install pandas==1.5.3 + python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_concat_csv.py -i ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }} -o ${{ env.NIGHTLY_FOLDER}} + python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py -f ${{ env.NIGHTLY_FOLDER}} + + - name: Summarize the results for pull request + if: github.event_name == 'pull_request' + shell: bash + run: | + cp -r /mnt/disk1/datasets/whisper_fp16_results/* /mnt/disk1/whisper_pr_gpu/${{ env.OUTPUT_PATH }} + pip install pandas==1.5.3 + python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_concat_csv.py -i ${{ env.PR_FOLDER}}/${{ env.OUTPUT_PATH }} -o ${{ env.PR_FOLDER}} + python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py -f ${{ env.PR_FOLDER}} \ No newline at end of file diff --git a/python/llm/dev/benchmark/whisper/run_whisper.py b/python/llm/dev/benchmark/whisper/run_whisper.py index 29339ca8764..286025d5b01 100644 --- a/python/llm/dev/benchmark/whisper/run_whisper.py +++ b/python/llm/dev/benchmark/whisper/run_whisper.py @@ -21,12 +21,20 @@ from evaluate import load import time import argparse +import pandas as pd +import os +import csv +from datetime import date + +current_dir = os.path.dirname(os.path.realpath(__file__)) def get_args(): parser = argparse.ArgumentParser(description="Evaluate Whisper performance and accuracy") parser.add_argument('--model_path', required=True, help='pretrained model path') parser.add_argument('--data_type', required=True, help='clean, other') parser.add_argument('--device', required=False, help='cpu, xpu') + parser.add_argument('--load_in_low_bit', default='sym_int4', help='Specify whether to load data in low bit format (e.g., 4-bit)') + parser.add_argument('--save_result', action='store_true', help='Save the results to a CSV file') args = parser.parse_args() return args @@ -40,7 +48,7 @@ def get_args(): processor = WhisperProcessor.from_pretrained(args.model_path) forced_decoder_ids = processor.get_decoder_prompt_ids(language='en', task='transcribe') - model = AutoModelForSpeechSeq2Seq.from_pretrained(args.model_path, load_in_low_bit="sym_int4", optimize_model=True).eval().to(args.device) + model = AutoModelForSpeechSeq2Seq.from_pretrained(args.model_path, load_in_low_bit=args.load_in_low_bit, optimize_model=True).eval().to(args.device) model.config.forced_decoder_ids = None def map_to_pred(batch): @@ -67,6 +75,24 @@ def map_to_pred(batch): wer = load("./wer") speech_length = sum(result["length"][1:]) prc_time = sum(result["time"][1:]) - print("Realtime Factor(RTF) is : %.4f" % (prc_time/speech_length)) - print("Realtime X(RTX) is : %.2f" % (speech_length/prc_time)) - print(f'WER is {100 * wer.compute(references=result["reference"], predictions=result["prediction"])}') \ No newline at end of file + + MODEL = args.model_path.split('/')[-2] + RTF = prc_time/speech_length + RTX = speech_length/prc_time + WER = 100 * wer.compute(references=result["reference"], predictions=result["prediction"]) + + today = date.today() + if args.save_result: + csv_name = f'{current_dir}/results/{MODEL}-{args.data_type}-{args.device}-{args.load_in_low_bit}-{today}.csv' + os.makedirs(os.path.dirname(csv_name), exist_ok=True) + with open(csv_name, mode='a', newline='') as file: + csv_writer = csv.writer(file) + file.seek(0, os.SEEK_END) + if file.tell() == 0: + csv_writer.writerow(["models","precision","WER","RTF"]) + csv_writer.writerow([MODEL, args.load_in_low_bit, WER, RTF]) + print(f'Results saved to {csv_name}') + + print("Realtime Factor(RTF) is : %.4f" % RTF) + print("Realtime X(RTX) is : %.2f" % RTX) + print(f'WER is {WER}') \ No newline at end of file diff --git a/python/llm/dev/benchmark/whisper/whisper_concat_csv.py b/python/llm/dev/benchmark/whisper/whisper_concat_csv.py new file mode 100644 index 00000000000..ceca9c94b09 --- /dev/null +++ b/python/llm/dev/benchmark/whisper/whisper_concat_csv.py @@ -0,0 +1,50 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Python program to concat CSVs + +import os +import sys +import argparse +import pandas as pd +from datetime import date + +def main(): + parser = argparse.ArgumentParser(description="concat .csv files") + parser.add_argument("-i", "--input_path", type=str, dest="input_path", + help="The directory which stores the original CSV files", default="./") + parser.add_argument("-o", "--output_path", type=str, dest="output_path", + help="The directory which stores the concated CSV file", default="./") + + args = parser.parse_args() + + csv_files = [] + for file_name in os.listdir(args.input_path): + file_path = os.path.join(args.input_path, file_name) + if os.path.isfile(file_path) and file_name.endswith(".csv"): + csv_files.append(file_path) + csv_files.sort() + + merged_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True) + merged_df.reset_index(drop=True, inplace=True) + + today = date.today() + csv_name = f'whisper-{today}.csv' + output_file_path = os.path.join(args.output_path, csv_name) + merged_df.to_csv(output_file_path) + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py b/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py new file mode 100644 index 00000000000..10ae19d6e9c --- /dev/null +++ b/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py @@ -0,0 +1,113 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Python program to convert CSV to HTML Table + +import os +import sys +import argparse +import pandas as pd + +def highlight_vals(val, max=3.0, color1='red', color2='green'): + if isinstance(val, float): + if val > max: + return 'background-color: %s' % color2 + elif val <= -max: + return 'background-color: %s' % color1 + else: + return '' + +def main(): + parser = argparse.ArgumentParser(description="convert .csv file to .html file") + parser.add_argument("-f", "--folder_path", type=str, dest="folder_path", + help="The directory which stores the .csv file", default="/mnt/disk1/whisper_pr_gpu/") + parser.add_argument("-t", "--threshold", type=float, dest="threshold", + help="the threshold of highlight values", default=1.0) + args = parser.parse_args() + + csv_files = [] + for file_name in os.listdir(args.folder_path): + file_path = os.path.join(args.folder_path, file_name) + if os.path.isfile(file_path) and file_name.endswith(".csv"): + csv_files.append(file_path) + csv_files.sort(reverse=True) + + latest_csv = pd.read_csv(csv_files[0], index_col=0) + daily_html=csv_files[0].split(".")[0]+".html" + + if len(csv_files)>1: + + previous_csv = pd.read_csv(csv_files[1], index_col=0) + + last1=['']*len(latest_csv.index) + diff1=['']*len(latest_csv.index) + last2=['']*len(latest_csv.index) + diff2=['']*len(latest_csv.index) + + WER='WER' + RTF='RTF' + + for latest_csv_ind,latest_csv_row in latest_csv.iterrows(): + + latest_csv_model=latest_csv_row['models'].strip() + latest_csv_precision=latest_csv_row['precision'].strip() + latest_WER=latest_csv_row[WER] + latest_RTF=latest_csv_row[RTF] + + in_previous_flag=False + + for previous_csv_ind,previous_csv_row in previous_csv.iterrows(): + + previous_csv_model=previous_csv_row['models'].strip() + previous_csv_precision=previous_csv_row['precision'].strip() + + if latest_csv_model==previous_csv_model and latest_csv_precision==previous_csv_precision: + + previous_WER=previous_csv_row[WER] + previous_RTF=previous_csv_row[RTF] + if previous_WER > 0.0 and previous_RTF > 0.0: + last1[latest_csv_ind]=previous_WER + diff1[latest_csv_ind]=round((previous_WER-latest_WER)*100/previous_WER,2) + last2[latest_csv_ind]=previous_RTF + diff2[latest_csv_ind]=round((previous_RTF-latest_RTF)*100/previous_RTF,2) + in_previous_flag=True + + if not in_previous_flag: + last1[latest_csv_ind]=pd.NA + diff1[latest_csv_ind]=pd.NA + last2[latest_csv_ind]=pd.NA + diff2[latest_csv_ind]=pd.NA + + latest_csv.insert(loc=4,column='last1',value=last1) + latest_csv.insert(loc=5,column='diff1(%)',value=diff1) + latest_csv.insert(loc=6,column='last2',value=last2) + latest_csv.insert(loc=7,column='diff2(%)',value=diff2) + + subset1=['diff1(%)','diff2(%)'] + columns={'WER': '{:.6f}', 'RTF': '{:.6f}', 'last1': '{:.6f}', 'diff1(%)': '{:.6f}','last2': '{:.6f}', 'diff2(%)': '{:.6f}'} + + styled_df = latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=1.0, color1='red', color2='green'), subset=subset1) + html_output = styled_df.set_table_attributes("border=1").render() + + with open(daily_html, 'w') as f: + f.write(html_output) + else: + latest_csv.to_html(daily_html) + + return 0 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file