From fd3f76115ac9779fa15c8478ae75886150a13356 Mon Sep 17 00:00:00 2001 From: Jiafu Zhang Date: Mon, 18 Dec 2023 21:46:20 +0800 Subject: [PATCH] verify Signed-off-by: Jiafu Zhang --- .github/workflows/workflow_finetune.yml | 144 ------------------ .github/workflows/workflow_finetune_gpu.yml | 25 --- .github/workflows/workflow_inference.yml | 100 ++---------- .github/workflows/workflow_orders.yml | 3 - .github/workflows/workflow_orders_nightly.yml | 20 --- 5 files changed, 11 insertions(+), 281 deletions(-) delete mode 100644 .github/workflows/workflow_finetune.yml delete mode 100644 .github/workflows/workflow_finetune_gpu.yml delete mode 100644 .github/workflows/workflow_orders_nightly.yml diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml deleted file mode 100644 index 1aaaf4df5..000000000 --- a/.github/workflows/workflow_finetune.yml +++ /dev/null @@ -1,144 +0,0 @@ -name: Finetune - -on: - workflow_call: - inputs: - ci_type: - type: string - default: 'pr' - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-ft - cancel-in-progress: true - -jobs: - inference: - name: finetune test - strategy: - matrix: - model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ] - isPR: - - ${{inputs.ci_type == 'pr'}} - - exclude: - - { isPR: true } - include: - - { model: "EleutherAI/gpt-j-6b"} - - { model: "meta-llama/Llama-2-7b-chat-hf"} - - runs-on: self-hosted - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Load environment variables - run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV - - - name: Build Docker Image - run: docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes | docker image prune - - - name: Start Docker Container - run: | - cid=$(docker ps -q --filter "name=finetune") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="finetune" --hostname="finetune-container" finetune:latest - - - name: Run Finetune Test - run: | - docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external" - CMD=$(cat << EOF - conf_path = "finetune/finetune.conf" - with open(conf_path, encoding="utf-8") as reader: - result = eval(reader.read()) - result['General']['base_model'] = "${{ matrix.model }}" - if "${{ matrix.model }}" == "mosaicml/mpt-7b-chat": - result['General']['config']['trust_remote_code'] = True - else: - result['General']['config']['trust_remote_code'] = False - if "${{ matrix.model }}" == "EleutherAI/gpt-j-6b" or "${{ matrix.model }}" == "gpt2": - result['General']["gpt_base_model"] = True - else: - result['General']["gpt_base_model"] = False - if "${{ matrix.model }}" == "meta-llama/Llama-2-7b-chat-hf": - result['General']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}" - else: - result['General']["config"]["use_auth_token"] = None - result['Training']['epochs'] = 1 - if "${{ matrix.model }}" == "gpt2": - # to verify oneccl - result['Training']['num_training_workers'] = 2 - else: - result['Training']['num_training_workers'] = 1 - result['General']['lora_config'] = None - with open(conf_path, 'w') as output: - print(result, file=output) - EOF - ) - docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf" - - - name: Run PEFT-LoRA Test - run: | - docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*" - CMD=$(cat << EOF - conf_path = "finetune/finetune.conf" - with open(conf_path, encoding="utf-8") as reader: - result = eval(reader.read()) - result['General']['lora_config'] = { - "task_type": "CAUSAL_LM", - "r": 8, - "lora_alpha": 32, - "lora_dropout": 0.1 - } - with open(conf_path, 'w') as output: - print(result, file=output) - EOF - ) - docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf" - - - name: Run Deltatuner Test on DENAS-LoRA Model - run: | - if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf)$ ]]; then - echo ${{ matrix.model }} is not supported! - else - docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*" - CMD=$(cat << EOF - import os - os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/") - conf_path = "finetune/finetune.conf" - with open(conf_path, encoding="utf-8") as reader: - result = eval(reader.read()) - result['General']['lora_config'] = { - "task_type": "CAUSAL_LM", - "r": 8, - "lora_alpha": 32, - "lora_dropout": 0.1 - } - result['General']['deltatuner_config'] = { - "algo": "lora", - "denas": True, - "best_model_structure": f"examples/best_structure/${{ matrix.model }}-best_structure.jsonl", - } - with open(conf_path, 'w') as output: - print(result, file=output) - EOF) - docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf" - fi - - - name: Stop Ray - run: | - cid=$(docker ps -q --filter "name=finetune") - if [[ ! -z "$cid" ]]; then - docker exec "finetune" bash -c "ray stop" - fi - - - name: Stop Container - if: success() || failure() - run: | - cid=$(docker ps -q --filter "name=finetune") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - - - name: Test Summary - run: echo "to be continued" \ No newline at end of file diff --git a/.github/workflows/workflow_finetune_gpu.yml b/.github/workflows/workflow_finetune_gpu.yml deleted file mode 100644 index e3adb7923..000000000 --- a/.github/workflows/workflow_finetune_gpu.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Finetune on Intel GPU - -on: - workflow_call: - -jobs: - finetune: - name: finetune on gpu test - strategy: - matrix: - model: [ pythia-6.9b, gpt-j-6b ] - runs-on: self-hosted - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Running task on Intel GPU - run: | - rm ~/borealis-runner/llm-ray.tar.gz -f - tar zcf ~/borealis-runner/llm-ray.tar.gz -C ~/actions-runner/_work/llm-ray . - cd ~/borealis-runner/ - python3 finetune_on_pvc.py --base_model "${{ matrix.model }}" - - - name: Test Summary - run: echo "to be continued" diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index 4662ee5eb..d1cf6e179 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -32,98 +32,20 @@ jobs: model: mpt-7b runs-on: self-hosted + container: + image: node:18 + env: + NODE_ENV: development + volumns: + - /var/run/docker.sock:/var/run/docker.sock steps: - name: Checkout uses: actions/checkout@v2 - - - name: Set Name Prefix - id: "prefix" - run: | - prefix="inference" - if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then - prefix="${prefix}_bigdl_cpu" - fi - echo "prefix is ${prefix}" - echo "prefix=$prefix" >> $GITHUB_OUTPUT - - - name: Build Docker Image - run: | - if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then - DF_SUFFIX=".bigdl-cpu" - else - DF_SUFFIX=".cpu_and_deepspeed" - fi - PREFIX=${{steps.prefix.outputs.prefix}} - docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes | docker image prune - - - name: Start Docker Container - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - cid=$(docker ps -q --filter "name=${PREFIX}") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest - - - name: Start Ray Cluster - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - docker exec "${PREFIX}" bash -c "./inference/deep/start-ray-cluster.sh" - - - name: Run Inference Test - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then - docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml" - else - docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py" - fi - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" - - - name: Run Inference Test with Deltatuner - if: ${{ matrix.dtuner_model }} - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" - - - name: Run Inference Test with DeepSpeed + - name: Show env run: | - PREFIX=${{steps.prefix.outputs.prefix}} - if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then - echo ${{ matrix.model }} is not supported! - else - docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" - fi + (ls /.dockerenv && echo Found dockerenv) || (echo No dockerenv) + echo "code dir is $GITHUB_WORKSPACE" + docker image ls - - name: Run Inference Test with DeepSpeed and Deltatuner - if: ${{ matrix.dtuner_model }} - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then - echo ${{ matrix.model }} is not supported! - else - docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" - docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" - fi - - - name: Stop Ray - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - cid=$(docker ps -q --filter "name=${PREFIX}") - if [[ ! -z "$cid" ]]; then - docker exec "${PREFIX}" bash -c "ray stop" - fi - - - name: Stop Container - if: success() || failure() - run: | - PREFIX=${{steps.prefix.outputs.prefix}} - cid=$(docker ps -q --filter "name=${PREFIX}") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - - name: Test Summary - run: echo "to be continued" \ No newline at end of file + \ No newline at end of file diff --git a/.github/workflows/workflow_orders.yml b/.github/workflows/workflow_orders.yml index e13bccecf..605b4cfc9 100644 --- a/.github/workflows/workflow_orders.yml +++ b/.github/workflows/workflow_orders.yml @@ -18,6 +18,3 @@ jobs: call-inference: uses: ./.github/workflows/workflow_inference.yml - - call-finetune: - uses: ./.github/workflows/workflow_finetune.yml diff --git a/.github/workflows/workflow_orders_nightly.yml b/.github/workflows/workflow_orders_nightly.yml deleted file mode 100644 index 2ba24db1a..000000000 --- a/.github/workflows/workflow_orders_nightly.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: llm-ray inference & finetune - -on: - schedule: - - cron: "0 21 * * *" - -jobs: - - call-inference: - uses: ./.github/workflows/workflow_inference.yml - with: - ci_type: nightly - - call-finetune: - uses: ./.github/workflows/workflow_finetune.yml - with: - ci_type: nightly - - call-finetune-on-intel-gpu: - uses: ./.github/workflows/workflow_finetune_gpu.yml