Fix experiment exit code (#85)

huggingface · Nov 16, 2023 · 0092945 · 0092945
1 parent d403046
commit 0092945
Show file tree

Hide file tree

Showing 19 changed files with 152 additions and 76 deletions.
diff --git a/.github/workflows/check_quality.yaml → .github/workflows/check_code_quality.yaml b/.github/workflows/check_quality.yaml → .github/workflows/check_code_quality.yaml
@@ -1,24 +1,25 @@
-name: Quality checks
+name: Quality Code Checks
 
 on:
+  workflow_dispatch:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    types: [opened, reopened, synchronize]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 jobs:
-  run_quality_checks:
+  run_code_quality_checks:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
 
@@ -29,4 +30,5 @@ jobs:
 
       - name: Check style
         run: |
-          make style_check
+          black --check .
+          ruff check .
diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml
@@ -0,0 +1,33 @@
+name: CLI Misc Tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [main]
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_misc_tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.8
+
+      - name: Install requirements
+        run: |
+          pip install --upgrade pip
+          pip install -e .[test]
+
+      - name: Run tests
+        run: |
+          pytest -k "not (cpu or cuda or rocm or tensorrt)"
diff --git a/.github/workflows/test_cpu_neural_compressor.yaml b/.github/workflows/test_cpu_neural_compressor.yaml
@@ -1,11 +1,11 @@
-name: Intel Neural Compressor CPU Tests
+name: CPU Intel Neural Compressor Tests
 
 on:
   workflow_dispatch:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    types: [opened, reopened, synchronize]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
 

diff --git a/.github/workflows/test_cpu_onnxruntime.yaml b/.github/workflows/test_cpu_onnxruntime.yaml
@@ -1,11 +1,11 @@
-name: OnnxRuntime CPU Tests
+name: CPU OnnxRuntime Tests
 
 on:
   workflow_dispatch:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    types: [opened, reopened, synchronize]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
 

diff --git a/.github/workflows/test_cpu_openvino.yaml b/.github/workflows/test_cpu_openvino.yaml
@@ -1,11 +1,11 @@
-name: OpenVINO CPU Tests
+name: CPU OpenVINO Tests
 
 on:
   workflow_dispatch:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    types: [opened, reopened, synchronize]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
 

diff --git a/.github/workflows/test_cpu_pytorch.yaml b/.github/workflows/test_cpu_pytorch.yaml
@@ -1,11 +1,11 @@
-name: Pytorch CPU tests
+name: CPU Pytorch tests
 
 on:
   workflow_dispatch:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    types: [opened, reopened, synchronize]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
 

diff --git a/.github/workflows/test_cuda_onnxruntime_inference.yaml b/.github/workflows/test_cuda_onnxruntime_inference.yaml
@@ -1,7 +1,9 @@
-name: OnnxRuntime CUDA Inference Tests
+name: CUDA OnnxRuntime Inference Tests
 
 on:
   workflow_dispatch:
+  push:
+    branches: [main]
   pull_request:
     types: [opened, reopened, synchronize]
 
@@ -14,7 +16,7 @@ jobs:
     runs-on: hf-dgx-01
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Build image
         run: docker build

diff --git a/.github/workflows/test_cuda_onnxruntime_training.yaml b/.github/workflows/test_cuda_onnxruntime_training.yaml
@@ -1,7 +1,9 @@
-name: OnnxRuntime CUDA Training Tests
+name: CUDA OnnxRuntime Training Tests
 
 on:
   workflow_dispatch:
+  push:
+    branches: [main]
   pull_request:
     types: [opened, reopened, synchronize]
 
@@ -14,7 +16,7 @@ jobs:
     runs-on: hf-dgx-01
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Build image
         run: docker build

diff --git a/.github/workflows/test_cuda_pytorch.yaml b/.github/workflows/test_cuda_pytorch.yaml
@@ -1,7 +1,9 @@
-name: Pytorch CUDA Tests
+name: CUDA Pytorch Tests
 
 on:
   workflow_dispatch:
+  push:
+    branches: [main]
   pull_request:
     types: [opened, reopened, synchronize]
 
@@ -11,19 +13,28 @@ concurrency:
 
 jobs:
   build_image_and_run_gpu_tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        image:
+          [
+            { torch_cuda: cu121, cuda_version: 12.1.1 },
+            { torch_cuda: cu118, cuda_version: 11.8.0 },
+          ]
+
     runs-on: hf-dgx-01
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Build image
         run: docker build
           --file docker/cuda.dockerfile
           --build-arg USER_ID=$(id -u)
           --build-arg GROUP_ID=$(id -g)
-          --build-arg CUDA_VERSION=12.1.1
-          --build-arg TORCH_CUDA=cu121
-          --tag opt-bench-cuda:12.1.1
+          --build-arg TORCH_CUDA=${{ matrix.image.torch_cuda }}
+          --build-arg CUDA_VERSION=${{ matrix.image.cuda_version }}
+          --tag opt-bench-cuda:${{ matrix.image.cuda_version }}
           .
 
       - name: Run tests
@@ -33,9 +44,10 @@ jobs:
           --pid host
           --shm-size 64G
           --env USE_CUDA="1"
-          --entrypoint /bin/bash
+          --volume $HOME/.cache/huggingface:/home/user/.cache/huggingface
           --volume $(pwd):/workspace/optimum-benchmark
           --workdir /workspace/optimum-benchmark
           --gpus '"device=0,1"'
-          opt-bench-cuda:12.1.1
+          --entrypoint /bin/bash
+          opt-bench-cuda:${{ matrix.image.cuda_version }}
           -c "pip install -e .[test,peft,diffusers] && pytest -k 'cuda and pytorch' -x"
diff --git a/.github/workflows/test_rocm_pytorch.yaml b/.github/workflows/test_rocm_pytorch.yaml
@@ -1,7 +1,9 @@
-name: Pytorch ROCm Tests
+name: ROCm Pytorch Tests
 
 on:
   workflow_dispatch:
+  push:
+    branches: [main]
   pull_request:
     types: [opened, reopened, synchronize]
 
@@ -16,20 +18,11 @@ jobs:
       matrix:
         image:
           [
-            {
-              rocm_version: 5.6.1,
-              torch_rocm_version: 5.6,
-              torch_pre_release: 0,
-            },
-            {
-              rocm_version: 5.7,
-              torch_rocm_version: 5.7,
-              torch_pre_release: 1,
-            },
+            { torch_rocm: rocm5.6, torch_pre_release: 0, rocm_version: 5.6.1 },
+            { torch_rocm: rocm5.7, torch_pre_release: 1, rocm_version: 5.7 },
           ]
-        runner: [hf-amd-mi210-dev]
 
-    runs-on: ${{ matrix.runner }}
+    runs-on: hf-amd-mi210-dev
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
@@ -39,15 +32,11 @@ jobs:
           --file docker/rocm.dockerfile
           --build-arg USER_ID=$(id -u)
           --build-arg GROUP_ID=$(id -g)
-          --build-arg ROCM_VERSION=$ROCM_VERSION
-          --build-arg TORCH_PRE_RELEASE=$TORCH_PRE_RELEASE
-          --build-arg TORCH_ROCM_VERSION=$TORCH_ROCM_VERSION
-          --tag opt-bench-rocm:$TORCH_ROCM_VERSION
+          --build-arg TORCH_ROCM=${{ matrix.image.torch_rocm }}
+          --build-arg TORCH_PRE_RELEASE=${{ matrix.image.torch_pre_release }}
+          --build-arg ROCM_VERSION=${{ matrix.image.rocm_version }}
+          --tag opt-bench-rocm:${{ matrix.image.rocm_version }}
           .
-        env:
-          ROCM_VERSION: ${{ matrix.image.rocm_version }}
-          TORCH_ROCM_VERSION: ${{ matrix.image.torch_rocm_version }}
-          TORCH_PRE_RELEASE: ${{ matrix.image.torch_pre_release }}
 
       - name: Run tests
         run: docker run
@@ -63,7 +52,5 @@ jobs:
           --device /dev/dri/renderD128
           --device /dev/dri/renderD129
           --entrypoint /bin/bash
-          opt-bench-rocm:$TORCH_ROCM_VERSION
+          opt-bench-rocm:${{ matrix.image.rocm_version }}
           -c "pip install -e .[test,peft,diffusers] && pytest -k 'cuda and pytorch' -x"
-        env:
-          TORCH_ROCM_VERSION: ${{ matrix.image.torch_rocm_version }}
diff --git a/.github/workflows/test_tensorrt_onnxruntime_inference.yaml b/.github/workflows/test_tensorrt_onnxruntime_inference.yaml
@@ -1,6 +1,9 @@
-name: OnnxRuntime TensorRT Inference Tests
+name: TensorRT OnnxRuntime Inference Tests
 
 on:
+  workflow_dispatch:
+  push:
+    branches: [main]
   pull_request:
     types: [opened, reopened, synchronize]
 
@@ -13,7 +16,7 @@ jobs:
     runs-on: hf-dgx-01
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Build image
         run: docker build

diff --git a/.gitignore b/.gitignore
@@ -167,6 +167,6 @@ sweeps/
 data/
 version.txt
 
-# Experiments
+actions-runner/
 experiments/
-examples
+examples/
diff --git a/docker/cuda.dockerfile b/docker/cuda.dockerfile
@@ -19,6 +19,7 @@ ARG UBUNTU_VERSION=22.04
 FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 
 ARG TORCH_CUDA=cu121
+ARG TORCH_PRE_RELEASE=0
 
 # Ignore interactive questions during `docker build`
 ENV DEBIAN_FRONTEND noninteractive
@@ -55,4 +56,7 @@ WORKDIR /home/user
 RUN pip install --upgrade pip
 
 # Install PyTorch
-RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA}
+RUN if [ "${TORCH_PRE_RELEASE}" = "1" ]; \
+    then pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA} ; \
+    else pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
+    fi