From fd3f76115ac9779fa15c8478ae75886150a13356 Mon Sep 17 00:00:00 2001
From: Jiafu Zhang <jiafu.zhang@intel.com>
Date: Mon, 18 Dec 2023 21:46:20 +0800
Subject: [PATCH] verify

Signed-off-by: Jiafu Zhang <jiafu.zhang@intel.com>
---
 .github/workflows/workflow_finetune.yml       | 144 ------------------
 .github/workflows/workflow_finetune_gpu.yml   |  25 ---
 .github/workflows/workflow_inference.yml      | 100 ++----------
 .github/workflows/workflow_orders.yml         |   3 -
 .github/workflows/workflow_orders_nightly.yml |  20 ---
 5 files changed, 11 insertions(+), 281 deletions(-)
 delete mode 100644 .github/workflows/workflow_finetune.yml
 delete mode 100644 .github/workflows/workflow_finetune_gpu.yml
 delete mode 100644 .github/workflows/workflow_orders_nightly.yml

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
deleted file mode 100644
index 1aaaf4df5..000000000
--- a/.github/workflows/workflow_finetune.yml
+++ /dev/null
@@ -1,144 +0,0 @@
-name: Finetune
-
-on:
-  workflow_call:
-    inputs:
-      ci_type:
-        type: string
-        default: 'pr'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-ft
-  cancel-in-progress: true
-
-jobs:
-  inference:
-    name: finetune test
-    strategy:
-      matrix:
-        model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ]
-        isPR:
-          - ${{inputs.ci_type == 'pr'}}
-        
-        exclude:
-          - { isPR: true }
-        include:
-          - { model: "EleutherAI/gpt-j-6b"}
-          - { model: "meta-llama/Llama-2-7b-chat-hf"}
-
-    runs-on: self-hosted
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-
-      - name: Load environment variables
-        run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV
-
-      - name: Build Docker Image
-        run: docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes | docker image prune
-
-      - name: Start Docker Container
-        run: |
-          cid=$(docker ps -q --filter "name=finetune")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-          docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="finetune" --hostname="finetune-container" finetune:latest
-
-      - name: Run Finetune Test
-        run: |
-          docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1  ray start --address='127.0.0.1:6379' --ray-debugger-external"
-          CMD=$(cat << EOF
-          conf_path = "finetune/finetune.conf"
-          with open(conf_path, encoding="utf-8") as reader:
-              result = eval(reader.read())
-              result['General']['base_model'] = "${{ matrix.model }}"
-              if "${{ matrix.model }}" == "mosaicml/mpt-7b-chat":
-                  result['General']['config']['trust_remote_code'] = True
-              else:
-                  result['General']['config']['trust_remote_code'] = False
-              if "${{ matrix.model }}" == "EleutherAI/gpt-j-6b" or "${{ matrix.model }}" == "gpt2":
-                  result['General']["gpt_base_model"] = True
-              else:
-                  result['General']["gpt_base_model"] = False
-              if "${{ matrix.model }}" == "meta-llama/Llama-2-7b-chat-hf":
-                  result['General']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
-              else:
-                  result['General']["config"]["use_auth_token"] = None
-              result['Training']['epochs'] = 1
-              if "${{ matrix.model }}" == "gpt2":
-                  # to verify oneccl
-                  result['Training']['num_training_workers'] = 2
-              else:
-                  result['Training']['num_training_workers'] = 1
-              result['General']['lora_config'] = None
-          with open(conf_path, 'w') as output:
-              print(result, file=output)
-          EOF
-          )
-          docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
-
-      - name: Run PEFT-LoRA Test
-        run: |
-          docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
-          CMD=$(cat << EOF
-          conf_path = "finetune/finetune.conf"
-          with open(conf_path, encoding="utf-8") as reader:
-              result = eval(reader.read())
-              result['General']['lora_config'] = {
-                  "task_type": "CAUSAL_LM",
-                  "r": 8,
-                  "lora_alpha": 32,
-                  "lora_dropout": 0.1
-              }
-          with open(conf_path, 'w') as output:
-              print(result, file=output)
-          EOF
-          )
-          docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
-
-      - name: Run Deltatuner Test on DENAS-LoRA Model
-        run: |
-          if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf)$ ]]; then
-            echo ${{ matrix.model }} is not supported!
-          else
-            docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
-            CMD=$(cat << EOF
-          import os
-          os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/")
-          conf_path = "finetune/finetune.conf"
-          with open(conf_path, encoding="utf-8") as reader:
-              result = eval(reader.read())
-              result['General']['lora_config'] = {
-                  "task_type": "CAUSAL_LM",
-                  "r": 8,
-                  "lora_alpha": 32,
-                  "lora_dropout": 0.1
-              }
-              result['General']['deltatuner_config'] = {
-                  "algo": "lora",
-                  "denas": True,
-                  "best_model_structure": f"examples/best_structure/${{ matrix.model }}-best_structure.jsonl",
-              }
-          with open(conf_path, 'w') as output:
-              print(result, file=output)
-          EOF)
-            docker exec "finetune" python -c "$CMD"
-            docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
-          fi
-
-      - name: Stop Ray
-        run: |
-          cid=$(docker ps -q --filter "name=finetune")
-          if [[ ! -z "$cid" ]]; then
-            docker exec "finetune" bash -c "ray stop"
-          fi
-          
-      - name: Stop Container
-        if: success() || failure()
-        run: |
-          cid=$(docker ps -q --filter "name=finetune")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-
-      - name: Test Summary
-        run: echo "to be continued"
\ No newline at end of file
diff --git a/.github/workflows/workflow_finetune_gpu.yml b/.github/workflows/workflow_finetune_gpu.yml
deleted file mode 100644
index e3adb7923..000000000
--- a/.github/workflows/workflow_finetune_gpu.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Finetune on Intel GPU
-
-on:
-  workflow_call:
-
-jobs:
-  finetune:
-    name: finetune on gpu test
-    strategy:
-      matrix:
-        model: [ pythia-6.9b, gpt-j-6b ]
-    runs-on: self-hosted
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-
-      - name: Running task on Intel GPU
-        run: |
-          rm ~/borealis-runner/llm-ray.tar.gz -f
-          tar zcf ~/borealis-runner/llm-ray.tar.gz -C ~/actions-runner/_work/llm-ray .
-          cd ~/borealis-runner/
-          python3 finetune_on_pvc.py --base_model "${{ matrix.model }}"
-
-      - name: Test Summary
-        run: echo "to be continued"
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 4662ee5eb..d1cf6e179 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -32,98 +32,20 @@ jobs:
             model: mpt-7b
 
     runs-on: self-hosted
+    container:
+      image: node:18
+      env:
+        NODE_ENV: development
+      volumns:
+        - /var/run/docker.sock:/var/run/docker.sock
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-
-      - name: Set Name Prefix
-        id: "prefix"
-        run: |
-          prefix="inference"
-          if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            prefix="${prefix}_bigdl_cpu"
-          fi
-          echo "prefix is ${prefix}"
-          echo "prefix=$prefix" >> $GITHUB_OUTPUT
-
-      - name: Build Docker Image
-        run: |
-          if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            DF_SUFFIX=".bigdl-cpu"
-          else
-            DF_SUFFIX=".cpu_and_deepspeed"
-          fi
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes | docker image prune
-
-      - name: Start Docker Container
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          cid=$(docker ps -q --filter "name=${PREFIX}")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-          docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
-
-      - name: Start Ray Cluster
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          docker exec "${PREFIX}" bash -c "./inference/deep/start-ray-cluster.sh"
-
-      - name: Run Inference Test
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false'  python inference/run_model_serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml"
-          else
-            docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py"
-          fi
-          docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
-          docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
-      
-      - name: Run Inference Test with Deltatuner
-        if: ${{ matrix.dtuner_model }}
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false'  python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml"
-          docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
-          docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
-
-      - name: Run Inference Test with DeepSpeed
+      - name: Show env
         run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then
-            echo ${{ matrix.model }} is not supported!
-          else
-            docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed"
-            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
-            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
-          fi
+          (ls /.dockerenv && echo Found dockerenv) || (echo No dockerenv)
+          echo "code dir is $GITHUB_WORKSPACE"
+          docker image ls
 
-      - name: Run Inference Test with DeepSpeed and Deltatuner
-        if: ${{ matrix.dtuner_model }}
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then
-            echo ${{ matrix.model }} is not supported!
-          else
-            docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml"
-            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
-            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
-          fi
-
-      - name: Stop Ray
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          cid=$(docker ps -q --filter "name=${PREFIX}")
-          if [[ ! -z "$cid" ]]; then
-            docker exec "${PREFIX}" bash -c "ray stop"
-          fi
-      
-      - name: Stop Container
-        if: success() || failure()
-        run: |
-          PREFIX=${{steps.prefix.outputs.prefix}}
-          cid=$(docker ps -q --filter "name=${PREFIX}")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
 
-      - name: Test Summary
-        run: echo "to be continued"
\ No newline at end of file
+      
\ No newline at end of file
diff --git a/.github/workflows/workflow_orders.yml b/.github/workflows/workflow_orders.yml
index e13bccecf..605b4cfc9 100644
--- a/.github/workflows/workflow_orders.yml
+++ b/.github/workflows/workflow_orders.yml
@@ -18,6 +18,3 @@ jobs:
 
   call-inference:
     uses: ./.github/workflows/workflow_inference.yml
-
-  call-finetune:
-    uses: ./.github/workflows/workflow_finetune.yml
diff --git a/.github/workflows/workflow_orders_nightly.yml b/.github/workflows/workflow_orders_nightly.yml
deleted file mode 100644
index 2ba24db1a..000000000
--- a/.github/workflows/workflow_orders_nightly.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-name: llm-ray inference & finetune
-
-on:
-  schedule:
-    - cron: "0 21 * * *"
-
-jobs:
-
-  call-inference:
-    uses: ./.github/workflows/workflow_inference.yml
-    with:
-      ci_type: nightly
-
-  call-finetune:
-    uses: ./.github/workflows/workflow_finetune.yml
-    with:
-      ci_type: nightly
-
-  call-finetune-on-intel-gpu:
-    uses: ./.github/workflows/workflow_finetune_gpu.yml