-
Notifications
You must be signed in to change notification settings - Fork 22
/
ubi.Dockerfile
217 lines (172 loc) · 7.18 KB
/
ubi.Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# syntax=docker/dockerfile:1
ARG WHISPER_MODEL=base
ARG LANG=en
ARG UID=1001
ARG VERSION=EDGE
ARG RELEASE=0
# These ARGs are for caching stage builds in CI
# Leave them as is when building locally
ARG LOAD_WHISPER_STAGE=load_whisper
ARG NO_MODEL_STAGE=no_model
# When downloading diarization model with auth token, it seems that it is not respecting the TORCH_HOME env variable.
# So it is necessary to ensure that the CACHE_HOME is set to the exact same path as the default path.
# https://github.com/jim60105/docker-whisperX/issues/27
ARG CACHE_HOME=/.cache
ARG CONFIG_HOME=/.config
ARG TORCH_HOME=${CACHE_HOME}/torch
ARG HF_HOME=${CACHE_HOME}/huggingface
########################################
# Base stage
########################################
FROM registry.access.redhat.com/ubi9/ubi-minimal AS base
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
ENV PYTHON_VERSION=3.11
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
RUN --mount=type=cache,id=dnf-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/dnf \
microdnf -y upgrade --refresh --best --nodocs --noplugins --setopt=install_weak_deps=0 && \
microdnf -y install --setopt=install_weak_deps=0 --setopt=tsflags=nodocs \
python3.11
RUN ln -s /usr/bin/python3.11 /usr/bin/python3 && \
ln -s /usr/bin/python3.11 /usr/bin/python
# Missing dependencies for arm64
# https://github.com/jim60105/docker-whisperX/issues/14
ARG TARGETPLATFORM
RUN --mount=type=cache,id=dnf-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/dnf \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
microdnf -y install --setopt=install_weak_deps=0 --setopt=tsflags=nodocs \
libgomp libsndfile; \
fi
########################################
# Build stage
########################################
FROM base AS build
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
# Install build time requirements
RUN --mount=type=cache,id=dnf-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/dnf \
microdnf -y install --setopt=install_weak_deps=0 --setopt=tsflags=nodocs \
git python3.11-pip findutils
WORKDIR /app
# Install under /root/.local
ARG PIP_USER="true"
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
ARG PIP_ROOT_USER_ACTION="ignore"
ARG PIP_NO_COMPILE="true"
ARG PIP_NO_BINARY="all"
ARG PIP_DISABLE_PIP_VERSION_CHECK="true"
# Install requirements
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
pip3.11 install -U --force-reinstall pip setuptools wheel && \
pip3.11 install -U --extra-index-url https://download.pytorch.org/whl/cu121 \
torch==2.2.2 torchaudio==2.2.2 \
pyannote.audio==3.1.1 \
# https://github.com/jim60105/docker-whisperX/issues/40
"numpy<2.0"
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=whisperX/requirements.txt,target=requirements.txt \
pip3.11 install -r requirements.txt
# Install whisperX
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=whisperX,target=.,rw \
--mount=type=tmpfs,target=/tmp \
pip3.11 install . && \
# Cleanup (Needed for Podman as it DOES write back to the build context)
rm -rf build
# Test whisperX
RUN python3 -c 'import whisperx;'
########################################
# Final stage for no_model
########################################
FROM base AS no_model
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ARG CACHE_HOME
ARG CONFIG_HOME
ARG TORCH_HOME
ARG HF_HOME
ENV XDG_CACHE_HOME=${CACHE_HOME}
ENV TORCH_HOME=${TORCH_HOME}
ENV HF_HOME=${HF_HOME}
ARG UID
RUN install -d -m 775 -o $UID -g 0 /licenses && \
install -d -m 775 -o $UID -g 0 /root && \
install -d -m 775 -o $UID -g 0 ${CACHE_HOME} && \
install -d -m 775 -o $UID -g 0 ${CONFIG_HOME}
# ffmpeg
COPY --from=ghcr.io/jim60105/static-ffmpeg-upx:7.1 /ffmpeg /usr/local/bin/
# COPY --from=ghcr.io/jim60105/static-ffmpeg-upx:7.1 /ffprobe /usr/local/bin/
# dumb-init
COPY --from=ghcr.io/jim60105/static-ffmpeg-upx:7.1 /dumb-init /usr/local/bin/
# Copy licenses (OpenShift Policy)
COPY --chown=$UID:0 --chmod=775 LICENSE /licenses/LICENSE
COPY --chown=$UID:0 --chmod=775 whisperX/LICENSE /licenses/whisperX.LICENSE
# Copy dependencies and code (and support arbitrary uid for OpenShift best practice)
# https://docs.openshift.com/container-platform/4.14/openshift_images/create-images.html#use-uid_create-images
COPY --chown=$UID:0 --chmod=775 --from=build /root/.local /root/.local
ENV PATH="/root/.local/bin:$PATH"
ENV PYTHONPATH="/root/.local/lib/python3.11/site-packages"
WORKDIR /app
VOLUME [ "/app" ]
USER $UID
STOPSIGNAL SIGINT
ENTRYPOINT [ "dumb-init", "--", "/bin/sh", "-c", "whisperx \"$@\"" ]
ARG VERSION
ARG RELEASE
LABEL name="jim60105/docker-whisperX" \
# Authors for WhisperX
vendor="Bain, Max and Huh, Jaesung and Han, Tengda and Zisserman, Andrew" \
# Maintainer for this docker image
maintainer="jim60105" \
# Dockerfile source repository
url="https://github.com/jim60105/docker-whisperX" \
version=${VERSION} \
# This should be a number, incremented with each change
release=${RELEASE} \
io.k8s.display-name="WhisperX" \
summary="WhisperX: Time-Accurate Speech Transcription of Long-Form Audio" \
description="This is the docker image for WhisperX: Automatic Speech Recognition with Word-Level Timestamps (and Speaker Diarization) from the community. For more information about this tool, please visit the following website: https://github.com/m-bain/whisperX."
########################################
# load_whisper stage
# This stage will be tagged for caching in CI.
########################################
FROM ${NO_MODEL_STAGE} AS load_whisper
ARG CONFIG_HOME
ARG XDG_CONFIG_HOME=${CONFIG_HOME}
ARG HOME="/root"
# Preload vad model
RUN python3 -c 'from whisperx.vad import load_vad_model; load_vad_model("cpu");'
ARG WHISPER_MODEL
ENV WHISPER_MODEL=${WHISPER_MODEL}
# Preload fast-whisper
RUN echo "Preload whisper model: ${WHISPER_MODEL}" && \
python3 -c "import faster_whisper; model = faster_whisper.WhisperModel('${WHISPER_MODEL}')"
########################################
# load_align stage
########################################
FROM ${LOAD_WHISPER_STAGE} AS load_align
ARG LANG
ENV LANG=${LANG}
# Preload align models
RUN --mount=source=load_align_model.py,target=load_align_model.py \
for i in ${LANG}; do echo "Preload align model: $i"; python3 load_align_model.py "$i"; done
########################################
# Final stage with model
########################################
FROM ${NO_MODEL_STAGE} AS final
ARG UID
ARG CACHE_HOME
COPY --chown=$UID:0 --chmod=775 --from=load_align ${CACHE_HOME} ${CACHE_HOME}
ARG LANG
ENV LANG=${LANG}
ARG WHISPER_MODEL
ENV WHISPER_MODEL=${WHISPER_MODEL}
# Take the first language from LANG env variable
ENTRYPOINT [ "dumb-init", "--", "/bin/sh", "-c", "LANG=$(echo ${LANG} | cut -d ' ' -f1); whisperx --model \"${WHISPER_MODEL}\" --language \"${LANG}\" \"$@\"" ]
ARG VERSION
ARG RELEASE
LABEL version=${VERSION} \
release=${RELEASE}