Skip to content

Commit

Permalink
Merge branch 'main' into cicd_release_zip
Browse files Browse the repository at this point in the history
  • Loading branch information
JinBridger authored Nov 1, 2023
2 parents f1b25ff + d18e153 commit 16f531a
Show file tree
Hide file tree
Showing 270 changed files with 15,123 additions and 6,140 deletions.
3 changes: 3 additions & 0 deletions .github/actions/llm/setup-llm-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ runs:
# make sure we install the latest version for bigdl-core-xe
pip uninstall bigdl-core-xe || true
sed -i 's/"bigdl-core-xe==" + VERSION + "/"bigdl-core-xe/g' python/llm/setup.py
# make sure we install the latest version for bigdl-core-xe-esimd
pip uninstall bigdl-core-xe-esimd || true
sed -i 's/"bigdl-core-xe-esimd==" + VERSION + "/"bigdl-core-xe-esimd/g' python/llm/setup.py
pip install requests
if [[ ${{ runner.os }} == 'Linux' ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ runs:
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} tensorboardX==2.1
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} tensorflow==2.3.0
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} tensorflow-estimator==2.3.0
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} xgboost_ray
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} xgboost_ray==0.1.8
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} Pillow
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} pyarrow==4.0.1
pip install -i ${GONDOLIN_PIP_MIRROR} --trusted-host ${GONDOLIN_TRUSTED_HOST} h5py==2.10.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
name: 'Run PPML Occlum k8s ExampleTests'
description: 'Run PPML Occlum k8s ExampleTests'
inputs:
image:
description: 'image'
required: true
default: '10.239.45.10/arda/intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum'
image-tag:
description: 'image tag'
required: true
default: '2.4.0-SNAPSHOT-EDMM'
runs:
using: "composite"
steps:
- name: Run tests
shell: bash
env:
DEFAULT_IMAGE: ${{ inputs.image }}:${{ inputs.image-tag }}
run: |
whoami
export IMAGE=${{ env.DEFAULT_IMAGE }}
docker pull $IMAGE
docker pull intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum:2.4.0-SNAPSHOT-EDMM
export NO_PROXY=10.239.45.10:8081,10.112.231.51,10.239.45.10,172.168.0.205,172.168.0.210
export kubernetes_master_url=172.168.0.210
export SPARK_HOME=/opt/spark-3.1.2-bin-hadoop3.2
cd /home/icx/BigDL/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes
status_1_spark_pi=1
status_2_spark_lr=1
status_3_spark_sql=1
status_4_spark_gbt=1
status_5_spark_gbt_criteo=1
status_6_spark_tpch=1
status_7_pyspark_sklearn=1
status_8_pyspark_sql=1
status_9_pyspark_tpch=1
status_10_spark_lgbm=1
if [ $status_1_spark_pi -ne 0 ]; then
echo "################## start spark pi"
echo "example.1 spark pi"
bash run_spark_pi.sh $IMAGE
if [ "`kubectl get pods | grep spark-pi-test | awk '{print $1}'`" != "" ]; then
pi_driver=`kubectl get pods | grep spark-pi-test | awk '{print $1}' | grep driver`
fi
kubectl logs $pi_driver | egrep 'Pi is roughly 3'
status_1_spark_pi=$(echo $?)
if [ "`kubectl get pods | grep spark-pi-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $pi_driver
fi
fi
if [ $status_2_spark_lr -ne 0 ]; then
echo "################## start spark lr"
echo "example.2 spark lr"
bash run_spark_lr.sh $IMAGE
if [ "`kubectl get pods | grep spark-lr-test | awk '{print $1}'`" != "" ]; then
lr_driver=`kubectl get pods | grep spark-lr-test | awk '{print $1}' | grep driver`
fi
kubectl logs $lr_driver | egrep 'Training data results'
status_2_spark_lr=$(echo $?)
if [ "`kubectl get pods | grep spark-lr-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $lr_driver
fi
fi
if [ $status_3_spark_sql -ne 0 ]; then
echo "################## start spark sql"
echo "example.3 spark sql"
bash run_spark_sql.sh $IMAGE
if [ "`kubectl get pods | grep spark-sql-test | awk '{print $1}'`" != "" ]; then
sql_driver=`kubectl get pods | grep spark-sql-test | awk '{print $1}' | grep driver`
fi
kubectl logs $sql_driver | egrep 'Name: Michael'
status_3_spark_sql=$(echo $?)
if [ "`kubectl get pods | grep spark-sql-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $sql_driver
fi
fi
if [ $status_4_spark_gbt -ne 0 ]; then
echo "################## start spark gbt"
echo "example.4 spark gbt"
bash run_spark_gbt.sh $IMAGE
if [ "`kubectl get pods | grep spark-gbt-example-test | awk '{print $1}'`" != "" ]; then
gbt_example_driver=`kubectl get pods | grep spark-gbt-example-test | awk '{print $1}' | grep driver`
fi
kubectl logs $gbt_example_driver | egrep 'Training data results'
status_4_spark_gbt=$(echo $?)
if [ "`kubectl get pods | grep spark-gbt-example-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $gbt_example_driver
fi
fi
if [ $status_5_spark_gbt_criteo -ne 0 ]; then
echo "################## start spark gbt criteo"
echo "example.5 spark gbt criteo"
bash run_spark_gbt_criteo.sh $IMAGE
if [ "`kubectl get pods | grep spark-gbt-criteo-test | awk '{print $1}'`" != "" ]; then
gbt_criteo_driver=`kubectl get pods | grep spark-gbt-criteo-test | grep driver | awk '{print $1}'`
fi
kubectl logs $gbt_criteo_driver | egrep 'end time is'
status_5_spark_gbt_criteo=$(echo $?)
if [ "`kubectl get pods | grep spark-gbt-criteo-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $gbt_criteo_driver
fi
fi
if [ $status_6_spark_tpch -ne 0 ]; then
echo "################## start spark tpch"
echo "example.6 spark tpch"
bash run_spark_tpch.sh $IMAGE
if [ "`kubectl get pods | grep spark-tpch-test | awk '{print $1}'`" != "" ]; then
tpch_driver=`kubectl get pods | grep spark-tpch-test | awk '{print $1}' | grep driver`
fi
kubectl logs $tpch_driver | egrep '22 finished-'
status_6_spark_tpch=$(echo $?)
if [ "`kubectl get pods | grep spark-tpch-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $tpch_driver
fi
fi
if [ $status_7_pyspark_sklearn -ne 0 ]; then
echo "################## start pyspark sklearn"
echo "example.7 pyspark sklearn"
bash run_pyspark_sklearn_example.sh $IMAGE
if [ "`kubectl get pods | grep pyspark-sklearn-test | awk '{print $1}'`" != "" ]; then
sklearn_driver=`kubectl get pods | grep pyspark-sklearn-test | awk '{print $1}' | grep driver`
fi
kubectl logs $sklearn_driver | egrep 'mean_squared_error'
status_7_pyspark_sklearn=$(echo $?)
if [ "`kubectl get pods | grep pyspark-sklearn-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $sklearn_driver
fi
fi
if [ $status_8_pyspark_sql -ne 0 ]; then
echo "################## start pyspark SQL example"
echo "example.8 pyspark sql"
bash run_pyspark_sql_example.sh $IMAGE
if [ "`kubectl get pods | grep pyspark-pysql-test | awk '{print $1}'`" != "" ]; then
sql_driver=`kubectl get pods | grep pyspark-pysql-test | awk '{print $1}' | grep driver`
fi
kubectl logs $sql_driver | egrep 'Example API finished'
status_8_pyspark_sql=$(echo $?)
if [ "`kubectl get pods | grep pyspark-pysql-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $sql_driver
fi
fi
if [ $status_9_pyspark_tpch -ne 0 ]; then
echo "################## start pyspark tpch"
echo "example.9 pyspark tpch"
bash run_pyspark_tpch.sh $IMAGE
if [ "`kubectl get pods | grep pyspark-tpch-test | awk '{print $1}'`" != "" ]; then
pytpch_driver=`kubectl get pods | grep pyspark-tpch-test | awk '{print $1}' | grep driver`
fi
kubectl logs $pytpch_driver | egrep 'total time is'
status_9_pyspark_tpch=$(echo $?)
if [ "`kubectl get pods | grep pyspark-tpch-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $pytpch_driver
fi
fi
if [ $status_10_spark_lgbm -ne 0 ]; then
echo "################## start spark lgbm"
echo "example.10 spark lgbm"
bash run_spark_lgbm.sh $IMAGE
if [ "`kubectl get pods | grep spark-lgbm-test | awk '{print $1}'`" != "" ]; then
lgbm_driver=`kubectl get pods | grep spark-lgbm-test | awk '{print $1}' | grep driver`
fi
kubectl logs $lgbm_driver | egrep 'acc:'
status_10_spark_lgbm=$(echo $?)
if [ "`kubectl get pods | grep spark-lgbm-test | awk '{print $1}'`" != "" ]; then
kubectl delete pods $lgbm_driver
fi
fi
echo "status_1_spark_pi $status_1_spark_pi"
echo "status_2_spark_lr $status_2_spark_lr"
echo "status_3_spark_sql $status_3_spark_sql"
echo "status_4_spark_gbt $status_4_spark_gbt"
echo "status_5_spark_gbt_criteo $status_5_spark_gbt_criteo"
echo "status_6_spark_tpch $status_6_spark_tpch"
echo "status_7_pyspark_sklearn $status_7_pyspark_sklearn"
echo "status_8_pyspark_sql $status_8_pyspark_sql"
echo "status_9_pyspark_tpch $status_9_pyspark_tpch"
echo "status_10_spark_lgbm $status_10_spark_lgbm"
30 changes: 30 additions & 0 deletions .github/workflows/llm-binary-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ jobs:
yum install -y gcc-toolset-11 cmake git
conda remove -n python39 --all -y
conda create -n python39 python=3.9 -y
conda remove -n python310 --all -y
conda create -n python310 python=3.10 -y
- uses: actions/checkout@v3
with:
repository: "intel-analytics/llm.cpp"
Expand Down Expand Up @@ -103,6 +105,18 @@ jobs:
run: |
mv src/chatglm/build/main release/main-chatglm_vnni
mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so release/chatglm_C.cpython-39-x86_64-linux-gnu.so
- name: Build Chatglm Py310
shell: bash
run: |
source activate python310 || conda activate python310
cd src/chatglm
rm -r build
scl enable gcc-toolset-11 "cmake -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
- name: Move Chatglm binaries Py310
shell: bash
run: |
mv src/chatglm/build/_C.cpython-310-x86_64-linux-gnu.so release/chatglm_C.cpython-310-x86_64-linux-gnu.so
- name: Archive build files
uses: actions/upload-artifact@v3
with:
Expand All @@ -114,6 +128,7 @@ jobs:
run: |
make clean
conda remove -n python39 --all -y
conda remove -n python310 --all -y
check-linux-avx512-artifact:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -401,6 +416,21 @@ jobs:
run: |
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
mv src/chatglm/build/Release/_C.cp39-win_amd64.pyd release/chatglm_C.cp39-win_amd64.pyd
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Build Chatglm Py310
shell: powershell
run: |
cd src/chatglm
rm -r build
cmake -DAVXVNNI=ON -B build
cmake --build build --config Release -j
- name: Move Chatglm binaries Py310
shell: powershell
run: |
mv src/chatglm/build/Release/_C.cp310-win_amd64.pyd release/chatglm_C.cp310-win_amd64.pyd
- name: Archive build files
uses: actions/upload-artifact@v3
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/llm_example_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ on:
- '.github/actions/llm/setup-llm-env/action.yml'
- '.github/actions/llm/remove-llm-env/action.yml'
- '.github/actions/llm/download-llm-binary/action.yml'
- 'python/llm/dev/test/run-example-tests.sh'
- 'python/llm/example/**'
workflow_dispatch:
workflow_call:

Expand Down
59 changes: 17 additions & 42 deletions .github/workflows/llm_performance_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
llm-cpp-build:
uses: ./.github/workflows/llm-binary-build.yml
llm-performance-test:
if: false # skip cpu performance test for now; may add it back with separated runner
needs: llm-cpp-build
strategy:
fail-fast: false
Expand Down Expand Up @@ -88,25 +89,25 @@ jobs:
THREAD_NUM: 16
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
steps:
- name: Set environment variables
shell: bash
run: |
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
echo "LLAMA2_13B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-13b-chat-hf" >> "$GITHUB_ENV"
echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
echo "WHISPER_MEDIUM_ORIGIN_PATH=${ORIGIN_DIR}/whisper-medium" >> "$GITHUB_ENV"
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash
# pip install transformers_stream_generator for model internlm-chat-7b-8k
# pip install tiktoken for model Qwen-7B-Chat-10-12
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
python -m pip install --upgrade omegaconf
python -m pip install --upgrade pandas
python -m pip install --upgrade einops
python -m pip install --upgrade transformers_stream_generator
python -m pip install --upgrade tiktoken
- name: Download llm binary
uses: ./.github/actions/llm/download-llm-binary
Expand All @@ -122,44 +123,18 @@ jobs:
source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh
- name: Download LLMs
shell: bash
run: |
if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $LLAMA2_13B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_13B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-13b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then
echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
fi
if [ ! -d $WHISPER_MEDIUM_ORIGIN_PATH ]; then
echo "Directory $WHISPER_MEDIUM_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-medium -P $ORIGIN_DIR
fi
- name: Test on xpu
shell: bash
run: |
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
cd python/llm/test/benchmark/gpu
mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
export http_proxy=${HTTP_PROXY}
export https_proxy=${HTTPS_PROXY}
rm -rf test-result || true
mkdir test-result
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_7B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/llama2_7b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_7B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/llama2_7b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_13B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/llama2_13b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_13B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/llama2_13b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python chatglm2.py --model-dir="${CHATGLM2_6B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/chatglm2_6b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python chatglm2.py --model-dir="${CHATGLM2_6B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/chatglm2_6b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python whisper.py --model-dir="${WHISPER_MEDIUM_ORIGIN_PATH}" > test-result/whisper_medium-default-default.log
python ../analyze_log_dir.py --log-dir=./test-result --output-path=./xpu_latency.csv
timestamp=`date '+%Y%m%d'`
curl -T ./xpu_latency.csv ${LLM_FTP_URL}/llm/ggml-actions/perf/xpu_lantency_$timestamp.csv
python run.py
curl -T ./*.csv ${LLM_FTP_URL}/llm/ggml-actions/perf/
cp ./*.csv /mnt/disk1/nightly_perf/
cd ../../../test/benchmark
python csv_to_html.py -f /mnt/disk1/nightly_perf/
Loading

0 comments on commit 16f531a

Please sign in to comment.