From e101165268d6f2a27fc525a361a93c8758ab92c2 Mon Sep 17 00:00:00 2001 From: Ziteng Zhang <87107332+Jasonzzt@users.noreply.github.com> Date: Thu, 19 Oct 2023 15:28:05 +0800 Subject: [PATCH] Added Docker installation guide and modified link in Dockerfile (#9224) * changed '/ppml' into '/bigdl' and modified llama-7b * Added the contents of finetuning in README * Modified link of qlora_finetuning.py in Dockerfile --- docker/llm/README.md | 182 +++++++++++++++++- .../llm/finetune/qlora/xpu/docker/Dockerfile | 2 +- 2 files changed, 182 insertions(+), 2 deletions(-) diff --git a/docker/llm/README.md b/docker/llm/README.md index 17c278d67e3..b155a319a97 100644 --- a/docker/llm/README.md +++ b/docker/llm/README.md @@ -354,4 +354,184 @@ python3 -m fastchat.serve.openai_api_server --host localhost --port 8000 ## Docker installation guide for BigDL LLM Fine Tuning on CPU -## Docker installation guide for BigDL LLM Fine Tuning on XPU \ No newline at end of file +### 1. Prepare BigDL image for Lora Finetuning + +You can download directly from Dockerhub like: + +```bash +docker pull intelanalytics/bigdl-llm-finetune-lora-cpu:2.4.0-SNAPSHOT +``` + +Or build the image from source: + +```bash +export HTTP_PROXY=your_http_proxy +export HTTPS_PROXY=your_https_proxy + +docker build \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + -t intelanalytics/bigdl-llm-finetune-lora-cpu:2.4.0-SNAPSHOT \ + -f ./Dockerfile . +``` + +### 2. Prepare Base Model, Data and Container + +Here, we try to finetune [Llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b) with [Cleaned alpaca data](https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data_cleaned_archive.json), which contains all kinds of general knowledge and has already been cleaned. And please download them and start a docker container with files mounted like below: + +``` +docker run -itd \ + --name=bigdl-llm-fintune-lora-cpu \ + --cpuset-cpus="your_expected_range_of_cpu_numbers" \ + -e STANDALONE_DOCKER=TRUE \ + -e WORKER_COUNT_DOCKER=your_worker_count \ + -v your_downloaded_base_model_path:/bigdl/model \ + -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \ + intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \ + bash +``` + +You can adjust the configuration according to your own environment. After our testing, we recommend you set worker_count=1, and then allocate 80G memory to Docker. + +### 3. Start Finetuning + +Enter the running container: + +``` +docker exec -it bigdl-llm-fintune-lora-cpu bash +``` + +Then, run the script to start finetuning: + +``` +bash /bigdl/bigdl-lora-finetuing-entrypoint.sh +``` + +After minutes, it is expected to get results like: + +``` +Training Alpaca-LoRA model with params: +... +Related params +... +world_size: 2!! +PMI_RANK(local_rank): 1 +Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00, 2.28s/it] +Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00, 2.62s/it] +trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199 +[INFO] spliting and shuffling dataset... +[INFO] shuffling and tokenizing train data... +Map: 2%|▏ | 1095/49759 [00:00<00:30, 1599.00 examples/s]trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199 +[INFO] spliting and shuffling dataset... +[INFO] shuffling and tokenizing train data... +Map: 100%|██████████| 49759/49759 [00:29<00:00, 1678.89 examples/s] +[INFO] shuffling and tokenizing test data... +Map: 100%|██████████| 49759/49759 [00:29<00:00, 1685.42 examples/s] +[INFO] shuffling and tokenizing test data... +Map: 100%|██████████| 2000/2000 [00:01<00:00, 1573.61 examples/s] +Map: 100%|██████████| 2000/2000 [00:01<00:00, 1578.71 examples/s] +[INFO] begining the training of transformers... +[INFO] Process rank: 0, device: cpudistributed training: True + 0%| | 1/1164 [02:42<52:28:24, 162.43s/it] +``` + +You can run BF16-Optimized lora finetuning on kubernetes with OneCCL. So for kubernetes users, please refer to [here](https://github.com/intel-analytics/BigDL/tree/main/docker/llm/finetune/lora/cpu#run-bf16-optimized-lora-finetuning-on-kubernetes-with-oneccl). + +## Docker installation guide for BigDL LLM Fine Tuning on XPU + +The following shows how to fine-tune LLM with Quantization (QLoRA built on BigDL-LLM 4bit optimizations) in a docker environment, which is accelerated by Intel XPU. + +### 1. Prepare Docker Image + +You can download directly from Dockerhub like: + +```bash +docker pull intelanalytics/bigdl-llm-finetune-qlora-xpu:2.4.0-SNAPSHOT +``` + +Or build the image from source: + +```bash +export HTTP_PROXY=your_http_proxy +export HTTPS_PROXY=your_https_proxy + +docker build \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + -t intelanalytics/bigdl-llm-finetune-qlora-xpu:2.4.0-SNAPSHOT \ + -f ./Dockerfile . +``` + +### 2. Prepare Base Model, Data and Container + +Here, we try to fine-tune a [Llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b) with [English Quotes](https://huggingface.co/datasets/Abirate/english_quotes) dataset, and please download them and start a docker container with files mounted like below: + +```bash +export BASE_MODE_PATH=your_downloaded_base_model_path +export DATA_PATH=your_downloaded_data_path +export HTTP_PROXY=your_http_proxy +export HTTPS_PROXY=your_https_proxy + +docker run -itd \ + --net=host \ + --device=/dev/dri \ + --memory="32G" \ + --name=bigdl-llm-fintune-qlora-xpu \ + -e http_proxy=${HTTP_PROXY} \ + -e https_proxy=${HTTPS_PROXY} \ + -v $BASE_MODE_PATH:/model \ + -v $DATA_PATH:/data/english_quotes \ + --shm-size="16g" \ + intelanalytics/bigdl-llm-fintune-qlora-xpu:2.4.0-SNAPSHOT +``` + +The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files: + +```bash +export HTTP_PROXY=your_http_proxy +export HTTPS_PROXY=your_https_proxy + +docker run -itd \ + --net=host \ + --device=/dev/dri \ + --memory="32G" \ + --name=bigdl-llm-fintune-qlora-xpu \ + -e http_proxy=${HTTP_PROXY} \ + -e https_proxy=${HTTPS_PROXY} \ + --shm-size="16g" \ + intelanalytics/bigdl-llm-fintune-qlora-xpu:2.4.0-SNAPSHOT +``` + +However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset). + +### 3. Start Fine-Tuning + +Enter the running container: + +```bash +docker exec -it bigdl-llm-fintune-qlora-xpu bash +``` + +Then, start QLoRA fine-tuning: + +```bash +bash start-qlora-finetuning-on-xpu.sh +``` + +After minutes, it is expected to get results like: + +```bash +{'loss': 2.256, 'learning_rate': 0.0002, 'epoch': 0.03} +{'loss': 1.8869, 'learning_rate': 0.00017777777777777779, 'epoch': 0.06} +{'loss': 1.5334, 'learning_rate': 0.00015555555555555556, 'epoch': 0.1} +{'loss': 1.4975, 'learning_rate': 0.00013333333333333334, 'epoch': 0.13} +{'loss': 1.3245, 'learning_rate': 0.00011111111111111112, 'epoch': 0.16} +{'loss': 1.2622, 'learning_rate': 8.888888888888889e-05, 'epoch': 0.19} +{'loss': 1.3944, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.22} +{'loss': 1.2481, 'learning_rate': 4.4444444444444447e-05, 'epoch': 0.26} +{'loss': 1.3442, 'learning_rate': 2.2222222222222223e-05, 'epoch': 0.29} +{'loss': 1.3256, 'learning_rate': 0.0, 'epoch': 0.32} +{'train_runtime': 204.4633, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.978, 'train_loss': 1.5072882556915284, 'epoch': 0.32} +100%|██████████████████████████████████████████████████████████████████████████████████████| 200/200 [03:24<00:00, 1.02s/it] +TrainOutput(global_step=200, training_loss=1.5072882556915284, metrics={'train_runtime': 204.4633, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.978, 'train_loss': 1.5072882556915284, 'epoch': 0.32}) +``` diff --git a/docker/llm/finetune/qlora/xpu/docker/Dockerfile b/docker/llm/finetune/qlora/xpu/docker/Dockerfile index f6028dd50d9..f1bb8b0c31f 100644 --- a/docker/llm/finetune/qlora/xpu/docker/Dockerfile +++ b/docker/llm/finetune/qlora/xpu/docker/Dockerfile @@ -33,6 +33,6 @@ RUN curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-P # install huggingface dependencies pip install git+https://github.com/huggingface/transformers.git@${TRANSFORMERS_COMMIT_ID} && \ pip install peft==0.5.0 datasets && \ - wget https://raw.githubusercontent.com/intel-analytics/BigDL/main/python/llm/example/gpu/qlora_finetuning/qlora_finetuning.py + wget https://raw.githubusercontent.com/intel-analytics/BigDL/main/python/llm/example/GPU/QLoRA-FineTuning/qlora_finetuning.py ADD ./start-qlora-finetuning-on-xpu.sh /start-qlora-finetuning-on-xpu.sh