Skip to content

Commit

Permalink
Merge branch 'intel-analytics:main' into cicd_release_zip
Browse files Browse the repository at this point in the history
  • Loading branch information
JinBridger authored Oct 9, 2023
2 parents 49da199 + 05ffcda commit 9d61166
Show file tree
Hide file tree
Showing 146 changed files with 943 additions and 410 deletions.
192 changes: 192 additions & 0 deletions .github/actions/ppml/ppml-occlum-EDMM-exampletests-action/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
name: 'Run PPML Occlum EDMM ExampleTests'
description: 'Run PPML Occlum EDMM ExampleTests'
inputs:
image:
description: 'image'
required: true
default: '10.239.45.10/arda/intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum'
image-tag:
description: 'image tag'
required: true
default: '2.4.0-SNAPSHOT-EDMM6'
runs:
using: "composite"
steps:
- name: Run tests
shell: bash
env:
DEFAULT_IMAGE: ${{ inputs.image }}:${{ inputs.image-tag }}
run: |
whoami
# icx-6's kernel support EDMM
export LOCAL_IP=172.168.0.210
export CPUSET="6-10"
export CONTAINER_NAME="spark-occlum-edmm-jenkins"
export DATA_PATH=/home/icx/glorysdj/data
export KEYS_PATH=/home/icx/glorysdj/keys
export SECURE_PASSWORD_PATH=/home/icx/glorysdj/password
export SGX_MEM_SIZE=30GB
export SGX_KERNEL_HEAP=2GB
export IMAGE=${{ env.DEFAULT_IMAGE }}
docker pull $IMAGE
docker pull intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum:2.4.0-SNAPSHOT-EDMM
docker stop $CONTAINER_NAME
docker rm -f $CONTAINER_NAME
docker run -itd \
--net=host \
--cpuset-cpus=$CPUSET \
--oom-kill-disable \
--device=/dev/sgx/enclave \
--device=/dev/sgx/provision \
-v /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket \
-v $DATA_PATH:/opt/occlum_spark/data \
-v $KEYS_PATH:/opt/keys \
--name=$CONTAINER_NAME \
-e LOCAL_IP=$LOCAL_IP \
-e SGX_MEM_SIZE=$SGX_MEM_SIZE \
-e SGX_KERNEL_HEAP=$SGX_KERNEL_HEAP \
$IMAGE \
bash -c "tail -f /dev/null"
status_1_spark_pi=1
status_2_bigdl_lenet_mnist=1
status_3_bigdl_resnet_cifar10=1
status_4_spark_tpch=1
status_5_spark_ut=0
status_6_spark_xgboost=1
status_7_spark_gbt=1
status_8_pyspark_sklearn=1
status_9_pyspark_sql=1
status_10_pyspark_tpch=1
status_11_spark_lgbm=1
if [ $status_1_spark_pi -ne 0 ]; then
echo "################## start spark pi"
echo "example.1 spark pi"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
bash run_spark_on_occlum_glibc.sh pi | tee test-spark-pi-sgx.log && \
cat test-spark-pi-sgx.log | egrep 'Pi is roughly 3'"
status_1_spark_pi=$(echo $?)
fi
if [ $status_2_bigdl_lenet_mnist -ne 0 ]; then
echo "################## start bigdl lenet mnist"
echo "example.2 bigdl lenet mnist"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's# run_spark_lenet_mnist# run_spark_lenet_mnist -b 4 -e 1#g' run_spark_on_occlum_glibc.sh && \
sed -i 's# -f /host/data# -f /host/data/lenet#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh lenet -b 8 -e 1 | tee bigdl-lenet-mnist.log && \
cat bigdl-lenet-mnist.log | egrep 'records/second. Loss is' && \
sed -i 's# -f /host/data/lenet# -f /host/data#g' run_spark_on_occlum_glibc.sh"
status_2_bigdl_lenet_mnist=$(echo $?)
fi
if [ $status_3_bigdl_resnet_cifar10 -ne 0 ]; then
echo "################## start bigdl resnet cifar10"
echo "example.3 bigdl resnet cifar10"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's# run_spark_resnet_cifar# run_spark_resnet_cifar --nEpochs 1#g' run_spark_on_occlum_glibc.sh && \
sed -i 's# -f /host/data# -f /host/data/cifar#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh resnet | tee bigdl-resnet-cifar10.log && \
cat bigdl-resnet-cifar10.log | egrep 'Current momentum is '&& \
sed -i 's# -f /host/data/cifar# -f /host/data#g' run_spark_on_occlum_glibc.sh"
status_3_bigdl_resnet_cifar10=$(echo $?)
fi
if [ $status_4_spark_tpch -ne 0 ]; then
echo "################## start spark tpch"
echo "example.4 spark tpch"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's#spark.driver.memory=12g#spark.driver.memory=2g#g' run_spark_on_occlum_glibc.sh && \
sed -i 's#spark.executor.instances=8#spark.executor.instances=2#g' run_spark_on_occlum_glibc.sh && \
sed -i 's#executor-memory 8G#executor-memory 2G#g' run_spark_on_occlum_glibc.sh && \
sed -i 's#-Xmx78g -Xms78g#-Xmx10g -Xms10g#g' run_spark_on_occlum_glibc.sh && \
sed -i 's#/host/data /host/data/output#/host/data/tpch /host/data/output#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh tpch | tee spark-tpch.log && \
cat spark-tpch.log | egrep '22 finished-'"
status_4_spark_tpch=$(echo $?)
fi
if [ $status_5_spark_ut -ne 0 ]; then
echo "################## start spark unit test"
echo "example.5 spark unit test"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's#192.168.0.111#$LOCAL_IP#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh ut | tee spark-unit-test.log && \
cat spark-unit-test.log | egrep 'FINISHED o.a.s.status.api.v1.sql.SqlResourceSuite:'"
status_5_spark_ut=$(echo $?)
fi
if [ $status_6_spark_xgboost -ne 0 ]; then
echo "################## start spark xgboost"
echo "example.6 spark xgboost"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's#-i /host/data -s /host/data/model -t 2 -r 100 -d 2 -w 1#-i /host/data/xgboost -s /host/data/xgboost/model -t 2 -r 10 -d 2 -w 1#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh xgboost | tee spark-xgboost.log && \
cat spark-xgboost.log | egrep 'end time is'"
status_6_spark_xgboost=$(echo $?)
fi
if [ $status_7_spark_gbt -ne 0 ]; then
echo "################## start spark gbt"
echo "example.7 spark gbt"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's#-i /host/data -s /host/data/model -I 100 -d 5#-i /host/data/gbt -s /host/data/gbt/model -I 10 -d 5#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh gbt | tee spark-gbt.log && \
cat spark-gbt.log | egrep 'end time is'"
status_7_spark_gbt=$(echo $?)
fi
if [ $status_8_pyspark_sklearn -ne 0 ]; then
echo "################## start pyspark sklearn Linear Regression"
echo "example.8 pyspark sklearn"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
bash run_spark_on_occlum_glibc.sh pysklearn | tee test-pyspark-sklearn-sgx.log && \
cat test-pyspark-sklearn-sgx.log | egrep 'mean_squared_error'"
status_8_pyspark_sklearn=$(echo $?)
fi
if [ $status_9_pyspark_sql -ne 0 ]; then
echo "################## start pyspark SQL example"
echo "example.9 pyspark sql"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
bash run_spark_on_occlum_glibc.sh pysql | tee test-pyspark-sql-sgx.log && \
cat test-pyspark-sql-sgx.log | egrep 'Example API finished'"
status_9_pyspark_sql=$(echo $?)
fi
if [ $status_10_pyspark_tpch -ne 0 ]; then
echo "################## start pyspark tpch"
echo "example.10 pyspark tpch"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
sed -i 's#/host/data/ /host/data/output/ true#/host/data/tpch/ /host/data/output/ false#g' run_spark_on_occlum_glibc.sh && \
bash run_spark_on_occlum_glibc.sh pytpch | tee pyspark-tpch.log && \
cat pyspark-tpch.log | egrep 'total time is'"
status_10_pyspark_tpch=$(echo $?)
fi
if [ $status_11_spark_lgbm -ne 0 ]; then
echo "################## start spark lgbm"
echo "example.11 spark lgbm"
docker exec -i $CONTAINER_NAME bash -c "cd /opt && \
bash run_spark_on_occlum_glibc.sh lgbm | tee spark-lgbm.log && \
cat spark-lgbm.log | egrep 'acc:'"
status_11_spark_lgbm=$(echo $?)
fi
echo "status_1_spark_pi $status_1_spark_pi"
echo "status_2_bigdl_lenet_mnist $status_2_bigdl_lenet_mnist"
echo "status_3_bigdl_resnet_cifar10 $status_3_bigdl_resnet_cifar10"
echo "status_4_spark_tpch $status_4_spark_tpch"
#echo "status_5_spark_ut $status_5_spark_ut"
echo "status_6_spark_xgboost $status_6_spark_xgboost"
echo "status_7_spark_gbt $status_7_spark_gbt"
echo "status_8_pyspark_sklearn $status_8_pyspark_sklearn"
echo "status_9_pyspark_sql $status_9_pyspark_sql"
echo "status_10_pyspark_tpch $status_10_pyspark_tpch"
echo "status_11_spark_lgbm $status_11_spark_lgbm"
38 changes: 34 additions & 4 deletions .github/workflows/llm_performance_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ jobs:
env:
THREAD_NUM: 24
steps:
- name: Set environment variables
shell: bash
run: |
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
Expand All @@ -55,6 +59,14 @@ jobs:
env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}

- name: Download LLMs
shell: bash
run: |
if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi
- name: Run LLM Performance test
env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
Expand All @@ -76,17 +88,14 @@ jobs:
THREAD_NUM: 16
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
steps:
- name: Set model directories
shell: bash
run: |
echo "ORIGIN_DIR=/mnt/disk1/models" >> "$GITHUB_ENV"
- name: Set environment variables
shell: bash
run: |
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
echo "LLAMA2_13B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-13b-chat-hf" >> "$GITHUB_ENV"
echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
echo "WHISPER_MEDIUM_ORIGIN_PATH=${ORIGIN_DIR}/whisper-medium" >> "$GITHUB_ENV"
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
Expand All @@ -112,6 +121,27 @@ jobs:
run: |
source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh
- name: Download LLMs
shell: bash
run: |
if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $LLAMA2_13B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_13B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-13b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then
echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
fi
if [ ! -d $WHISPER_MEDIUM_ORIGIN_PATH ]; then
echo "Directory $WHISPER_MEDIUM_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-medium -P $ORIGIN_DIR
fi
- name: Test on xpu
shell: bash
run: |
Expand Down
37 changes: 36 additions & 1 deletion .github/workflows/nightly_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Nightly Test
on:

#pull_request:
#branches: [ main ]
# branches: [ main ]

schedule:
- cron: '30 15 * * *' # GMT time, 15:30 GMT == 23:30 China
Expand Down Expand Up @@ -68,6 +68,7 @@ on:
- PPML-Scala-UT
- PPML-Python-UT-Spark3
- PPML-Occlum-ExampleTests
- PPML-Occlum-EDMM-ExampleTests
- PPML-spark-Local-SimpleQuery-Tests-on-Gramine
- PPML-RealTime-ML-Occlum
- PPML-RealTime-ML-Occlum-K8s
Expand Down Expand Up @@ -1402,6 +1403,40 @@ jobs:
job-name: PPML-Occlum-ExampleTests
runner-hosted-on: 'Shanghai'

PPML-Occlum-EDMM-ExampleTests:
if: ${{ github.event.inputs.artifact == 'PPML-Occlum-EDMM-ExampleTests' }}
runs-on: [self-hosted, EDMM]

steps:
- uses: actions/checkout@v3
- name: Set up JDK8
uses: ./.github/actions/jdk-setup-action
- name: Set up maven
uses: ./.github/actions/maven-setup-action
- name: set env
env:
DEFAULT_IMAGE: '10.239.45.10/arda/intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum'
DEFAULT_TAG: '2.4.0-SNAPSHOT-EDMM'
run: |
echo "TAG=${{ github.event.inputs.tag || env.DEFAULT_TAG }}" >> $GITHUB_ENV
echo "IMAGE=${{ github.event.inputs.image || env.DEFAULT_IMAGE }}" >> $GITHUB_ENV
- name: Run Test
uses: ./.github/actions/ppml/ppml-occlum-EDMM-exampletests-action
with:
image: ${{env.IMAGE}}
image-tag: ${{env.TAG}}
- name: Create Job Badge
uses: ./.github/actions/create-job-status-badge
if: ${{ always() }}
with:
secret: ${{ secrets.GIST_SECRET}}
gist-id: ${{env.GIST_ID}}
is-self-hosted-runner: true
file-name: PPML-Occlum-EDMM-ExampleTests.json
type: job
job-name: PPML-Occlum-EDMM-ExampleTests
runner-hosted-on: 'Shanghai'

PPML-RealTime-ML-Occlum:
if: ${{ github.event.schedule || github.event.inputs.artifact == 'PPML-RealTime-ML-Occlum' || github.event.inputs.artifact == 'all' }}
runs-on: [self-hosted, Vilvarin]
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
> *It is built on top of the excellent work of [llama.cpp](https://github.com/ggerganov/llama.cpp), [ggml](https://github.com/ggerganov/ggml), [gptq](https://github.com/IST-DASLab/gptq), [bitsandbytes](https://github.com/TimDettmers/bitsandbytes), [qlora](https://github.com/artidoro/qlora), [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), [gptq_for_llama](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [chatglm.cpp](https://github.com/li-plus/chatglm.cpp), [redpajama.cpp](https://github.com/togethercomputer/redpajama.cpp), [gptneox.cpp](https://github.com/byroneverson/gptneox.cpp), [bloomz.cpp](https://github.com/NouamaneTazi/bloomz.cpp/), etc.*
### Latest update
- **[New]** `bigdl-llm` now supports QLoRA fintuning on Intel GPU; see the the example [here](python/llm/example/gpu/qlora_finetuning).
- `bigdl-llm` now supports Intel GPU (including Arc, Flex and MAX); see the the latest GPU examples [here](python/llm/example/gpu).
- **[New]** `bigdl-llm` now supports QLoRA fintuning on Intel GPU; see the the example [here](python/llm/example/GPU/QLoRA-FineTuning).
- `bigdl-llm` now supports Intel GPU (including Arc, Flex and MAX); see the the latest GPU examples [here](python/llm/example/GPU).
- `bigdl-llm` tutorial is released [here](https://github.com/intel-analytics/bigdl-llm-tutorial).
- Over 20 models have been optimized/verified on `bigdl-llm`, including *LLaMA/LLaMA2, ChatGLM/ChatGLM2, MPT, Falcon, Dolly, StarCoder, Whisper, InternLM, QWen, Baichuan, Aquila, MOSS,* and more; see the complete list [here](python/llm/README.md#verified-models).

Expand Down Expand Up @@ -76,7 +76,7 @@ input_ids = tokenizer.encode(input_str, ...)
output_ids = model.generate(input_ids, ...)
output = tokenizer.batch_decode(output_ids)
```
*See the complete examples [here](python/llm/example/transformers/transformers_int4/).*
*See the complete examples [here](python/llm/example/CPU/HF-Transformers-AutoModels/Model).*

#### GPU INT4
##### Install
Expand Down Expand Up @@ -105,7 +105,7 @@ input_ids = tokenizer.encode(input_str, ...).to('xpu')
output_ids = model.generate(input_ids, ...)
output = tokenizer.batch_decode(output_ids.cpu())
```
*See the complete examples [here](python/llm/example/gpu/).*
*See the complete examples [here](python/llm/example/GPU).*

#### More Low-Bit Support
##### Save and load
Expand All @@ -115,15 +115,15 @@ After the model is optimized using `bigdl-llm`, you may save and load the model
model.save_low_bit(model_path)
new_model = AutoModelForCausalLM.load_low_bit(model_path)
```
*See the complete example [here](python/llm/example/transformers/transformers_low_bit/).*
*See the complete example [here](python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load).*

##### Additonal data types

In addition to INT4, You may apply other low bit optimizations (such as *INT8*, *INT5*, *NF4*, etc.) as follows:
```python
model = AutoModelForCausalLM.from_pretrained('/path/to/model/', load_in_low_bit="sym_int8")
```
*See the complete example [here](python/llm/example/transformers/transformers_low_bit/).*
*See the complete example [here](python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types).*


***For more details, please refer to the `bigdl-llm` [Document](https://test-bigdl-llm.readthedocs.io/en/main/doc/LLM/index.html), [Readme](python/llm), [Tutorial](https://github.com/intel-analytics/bigdl-llm-tutorial) and [API Doc](https://bigdl.readthedocs.io/en/latest/doc/PythonAPI/LLM/index.html).***
Expand Down
Loading

0 comments on commit 9d61166

Please sign in to comment.