ci: seperate distil-large-v3-en Docker image build #147
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Check this guide for more information about publishing to ghcr.io with GitHub Actions: | |
# https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio | |
# Build the Docker image and push it to the registry | |
name: docker_publish | |
on: | |
push: | |
branches: | |
- "master" | |
tags: | |
- "*" | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
# Sets the permissions granted to the GITHUB_TOKEN for the actions in this job. | |
permissions: | |
contents: read | |
packages: write | |
jobs: | |
# Build the ubi-no_model without cache export | |
docker-ubi-no_model: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup docker | |
id: setup | |
uses: ./.github/workflows/docker-reused-steps | |
with: | |
tag: ubi-no_model | |
- name: Build:ubi-no_model | |
uses: docker/build-push-action@v5 | |
id: build | |
with: | |
context: . | |
file: ./ubi.Dockerfile | |
load: true | |
target: no_model | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
- name: Test ubi-no_model docker image | |
run: | | |
docker run --group-add 0 -v ".:/app" ${{ steps.build.outputs.imageid }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm; | |
if [ ! -f en.srt ]; then | |
echo "The en.srt file does not exist" | |
exit 1 | |
fi | |
echo "cat en.srt:"; | |
cat en.srt; | |
if ! grep -qi 'no' en.srt; then | |
echo "The en.srt file does not contain the word 'no'" | |
exit 1 | |
fi | |
echo "Test passed." | |
- name: Build and push:ubi-no_model | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./ubi.Dockerfile | |
push: true | |
target: no_model | |
tags: ${{ steps.setup.outputs.tags }} | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64, linux/arm64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
sbom: true | |
provenance: true | |
# Run the no_model build first ensure that the code at least builds | |
docker-no_model: | |
runs-on: ubuntu-latest | |
outputs: | |
digest: ${{ steps.publish.outputs.digest }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup docker | |
id: setup | |
uses: ./.github/workflows/docker-reused-steps | |
- name: Build:no_model | |
uses: docker/build-push-action@v5 | |
id: build | |
with: | |
context: . | |
file: ./Dockerfile | |
load: true | |
target: no_model | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
- name: Test no_model docker image | |
run: | | |
docker run --group-add 0 -v ".:/app" ${{ steps.build.outputs.imageid }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm; | |
if [ ! -f en.srt ]; then | |
echo "The en.srt file does not exist" | |
exit 1 | |
fi | |
echo "cat en.srt:"; | |
cat en.srt; | |
if ! grep -qi 'no' en.srt; then | |
echo "The en.srt file does not contain the word 'no'" | |
exit 1 | |
fi | |
echo "Test passed." | |
- name: Build and push:no_model | |
uses: docker/build-push-action@v5 | |
id: publish | |
with: | |
context: . | |
file: ./Dockerfile | |
push: true | |
target: no_model | |
tags: ${{ steps.setup.outputs.tags }} | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64, linux/arm64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
sbom: true | |
provenance: true | |
# Download whisper model cache | |
docker-cache: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: true | |
matrix: | |
model: | |
- tiny | |
- base | |
- small | |
- medium | |
- large-v3 | |
- distil-large-v3 | |
needs: docker-no_model # wait for docker-no_model to finish | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup docker | |
id: setup | |
uses: ./.github/workflows/docker-reused-steps | |
with: | |
tag: cache-${{ matrix.model }} | |
- name: Build cache:${{ matrix.model }} | |
uses: docker/build-push-action@v5 | |
id: build | |
with: | |
context: . | |
file: ./Dockerfile | |
push: true | |
target: load_whisper | |
tags: ${{ steps.setup.outputs.tags }} | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
WHISPER_MODEL=${{ matrix.model }} | |
NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }} | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64, linux/arm64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
sbom: true | |
provenance: true | |
# Build the distil-large-v3-en model (distil model seems to support only English) | |
docker-distil-large-v3-en: | |
# The type of runner that the job will run on | |
runs-on: ubuntu-latest | |
needs: | |
- docker-no_model # wait for docker-no_model to finish | |
- docker-cache # wait for docker-cache to finish | |
# Steps represent a sequence of tasks that will be executed as part of the job | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup docker | |
id: setup | |
uses: ./.github/workflows/docker-reused-steps | |
with: | |
tag: distil-large-v3-en | |
- name: Get short SHA | |
id: get-sha | |
run: | | |
id=$(echo ${{ github.sha }} | cut -c 1-7) | |
echo "id=$id" >> $GITHUB_OUTPUT | |
- name: Build and push:distil-large-v3-en | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./Dockerfile | |
target: final | |
push: true | |
tags: ${{ steps.setup.outputs.tags }} | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
WHISPER_MODEL=distil-large-v3 | |
LANG=en | |
LOAD_WHISPER_STAGE=ghcr.io/jim60105/whisperx:cache-distil-large-v3-${{ steps.get-sha.outputs.id }} | |
NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }} | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64, linux/arm64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
sbom: true | |
provenance: true | |
# Run the rest of the builds in parallel | |
docker: | |
# The type of runner that the job will run on | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
# max-parallel: 6 | |
matrix: | |
lang: | |
- en | |
- fr | |
- de | |
- es | |
- it | |
- ja | |
- zh | |
- nl | |
- uk | |
- pt | |
- ar | |
- cs | |
- ru | |
- pl | |
- hu | |
- fi | |
- fa | |
- el | |
- tr | |
- da | |
- he | |
- vi | |
- ko | |
- ur | |
- te | |
- hi | |
- ca | |
- ml | |
# Disable these languages because this transcribe model is too large to build on the GitHub Actions | |
# https://github.com/jim60105/docker-whisperX/actions/runs/8405597972 | |
# - no | |
# - nn | |
- sk | |
- sl | |
- hr | |
- ro | |
- eu | |
- gl | |
- ka | |
model: | |
- tiny | |
- base | |
- small | |
- medium | |
- large-v3 | |
needs: | |
- docker-no_model # wait for docker-no_model to finish | |
- docker-cache # wait for docker-cache to finish | |
# Steps represent a sequence of tasks that will be executed as part of the job | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup docker | |
id: setup | |
uses: ./.github/workflows/docker-reused-steps | |
with: | |
tag: ${{ matrix.model }}-${{ matrix.lang }} | |
- name: Get short SHA | |
id: get-sha | |
run: | | |
id=$(echo ${{ github.sha }} | cut -c 1-7) | |
echo "id=$id" >> $GITHUB_OUTPUT | |
- name: Build and push:${{ matrix.model }}-${{ matrix.lang }} | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./Dockerfile | |
target: final | |
push: true | |
tags: ${{ steps.setup.outputs.tags }} | |
labels: ${{ steps.setup.outputs.labels }} | |
build-args: | | |
WHISPER_MODEL=${{ matrix.model }} | |
LANG=${{ matrix.lang }} | |
LOAD_WHISPER_STAGE=ghcr.io/jim60105/whisperx:cache-${{ matrix.model }}-${{ steps.get-sha.outputs.id }} | |
NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }} | |
VERSION=${{ github.ref_name }} | |
RELEASE=${{ github.run_number }} | |
platforms: linux/amd64, linux/arm64 | |
# Cache to registry instead of gha to avoid the capacity limit. | |
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache | |
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max | |
sbom: true | |
provenance: true | |
test-large-v3-zh: | |
name: Test large-v3-zh docker image | |
runs-on: ubuntu-latest | |
needs: docker | |
steps: | |
# We require additional space due to the large size of our image. (~10GB) | |
- name: Free Disk Space (Ubuntu) | |
uses: jlumbroso/free-disk-space@main | |
with: | |
tool-cache: true | |
android: true | |
dotnet: true | |
haskell: true | |
large-packages: true | |
docker-images: true | |
swap-storage: false | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
sparse-checkout: | | |
.github/workflows/test/** | |
sparse-checkout-cone-mode: false | |
- name: Get short SHA | |
id: get-sha | |
run: | | |
id=$(echo ${{ github.sha }} | cut -c 1-7) | |
echo "id=$id" >> $GITHUB_OUTPUT | |
- name: Test large-v3-zh docker image | |
run: | | |
docker run --group-add 0 -v ".:/app" ghcr.io/jim60105/whisperx:large-v3-zh-${{ steps.get-sha.outputs.id }} -- --device cpu --compute_type int8 --output_format srt .github/workflows/test/zh.webm; | |
if [ ! -f zh.srt ]; then | |
echo "The zh.srt file does not exist" | |
exit 1 | |
fi | |
echo "cat zh.srt:"; | |
cat zh.srt; | |
if ! grep -qi -e '充满' -e '充滿' zh.srt; then | |
echo "The zh.srt file does not contain the word '充满' or '充滿'" | |
exit 1 | |
fi | |
echo "Test passed." |