Merge pull request #457 from Paladinium/alltalkbeta_docker

Adding Dockerfile and scripts for building and starting
erew123 · Dec 20, 2024 · 5a18d68 · 5a18d68
2 parents 6052453 + 78b0a51
commit 5a18d68
Show file tree

Hide file tree

Showing 12 changed files with 664 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -170,4 +170,8 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 
-.idea/*
+.idea/*
+
+# Docker build
+docker/conda/build
+docker/deepspeed/build
diff --git a/DOCKER_README.md b/DOCKER_README.md
@@ -0,0 +1,78 @@
+# Docker
+The Docker image currently works on Windows and Linux, optionally supporting NVIDIA GPUs.
+
+## General Remarks
+- The resulting Docker image is 21 GB in size. Building might require even more disk space temporarily.
+    - Another 15 GB is required for building DeepSpeed
+- Build time depends on your hardware and internet connection. Expect at least 20-30min to be normal for a full build.
+  - This includes building the conda environment as well as DeepSpeed, which is the basis for the alltalk Docker image.
+- The Docker build for alltalk:
+  - Downloads XTTS as default TTS engine
+  - Enables RVC by default
+  - Downloads all supported RVC models
+  - Enables deepspeed by default
+- Starting the Docker image should only a few seconds due to all the steps that were already executed during build.
+
+## Docker for Linux
+
+### Ubuntu Specific Setup for GPUs
+1. Make sure the latest nvidia drivers are installed: `sudo ubuntu-drivers install`
+1. Install Docker your preferred way. One way to do it is to follow the official documentation [here](https://docs.docker.com/engine/install/ubuntu/#uninstall-old-versions).
+    - Start by uninstalling the old versions
+    - Follow the "apt" repository installation method
+    - Check that everything is working with the "hello-world" container
+1. If, when launching the docker contain, you have an error message saying that the GPU cannot be used, you might have to install [Nvidia Docker Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
+    - Install with the "apt" method
+    - Run the docker configuration command
+      ```sudo nvidia-ctk runtime configure --runtime=docker```
+    - Restart docker
+
+## Docker for Windows (WSL2)
+### Windows Specific Setup for GPUs
+> Make sure your Nvidia drivers are up to date: https://www.nvidia.com/download/index.aspx
+1. Install WSL2 in PowerShell with `wsl --install` and restart
+2. Open PowerShell, type and enter ```ubuntu```.  It should now load you into wsl2
+3. Remove the original nvidia cache key: `sudo apt-key del 7fa2af80`
+4. Download CUDA toolkit keyring: `wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb`
+5. Install keyring: `sudo dpkg -i cuda-keyring_1.1-1_all.deb`
+6. Update package list: `sudo apt-get update`
+7. Install CUDA toolkit: `sudo apt-get -y install cuda-toolkit-12-4`
+8. Install Docker Desktop using WSL2 as the backend
+9. Restart
+10. If you wish to monitor the terminal remotely via SSH, follow [this guide](https://www.hanselman.com/blog/how-to-ssh-into-wsl2-on-windows-10-from-an-external-machine).
+11. Open PowerShell, type ```ubuntu```, [then follow below](#building-and-running-in-docker)
+
+## Building and Running in Docker
+
+1. Open a terminal (or Ubuntu WSL) and go where you cloned the repo
+3. Build the image with `./docker-build.sh`
+4. Start the container with `./docker-start.sh`
+5. Visit `http://localhost:7851/` or remotely with `http://<ip>:7851`
+
+## Arguments for building and starting docker
+There are various arguments to customize the build and start of the docker image.
+
+### Arguments for `docker-build.sh`
+- `--tts_model` allows to choose the TTS model that is used by default. Valid values are `piper`, `vits`, `xtts`. Defaults to `xtts`.
+  - Example: `docker-build.sh --tts_model piper`
+- `--tag` allows to choose the docker tag. Defaults to `latest`.
+  - Example: `docker-build.sh --tag mytag`
+- `--clean` allows remove existing dependency build like conda environment or DeepSpeed.
+    - Example: `docker-build.sh --clean`
+
+### Arguments for `docker-start.sh`
+- `--config` lets you choose a config JSON file which can subset of `confignew.json`. This allows you to change only 
+  few values and leave the rest as defined in the default `confignew.json` file.
+  - Example: `docker-start.sh --config /my/config/file.json` with content `{"branding": "My Brand "}` will just change
+    the branding in `confignew.json`.
+- `--voices` lets you add voices for the TTS engine in WAV format. You have to specify the folder containing all
+  voice files.
+  - Example: `docker-start.sh --voices /my/voices/dir`
+- `--rvc_voices` similar to voices, this option lets you pick the folder containing the RVC models.
+  - Example: `docker-start.sh --rvc_voices /my/rvc/voices/dir`
+- `--no_ui` allows you to not expose port 7852 for the gradio interface. Note that you still have to set `launch_gradio`
+  to `false` via JSON file passed to `--config`.
+- `--tag` allows to choose the docker tag of the image to run. Defaults to `latest`.
+    - Example: `docker-start.sh --tag mytag`
+- Since the above commands only address the most important options, you might pass additional arbitrary docker commands
+    to the `docker-start.sh`.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,159 @@
+FROM continuumio/miniconda3:24.7.1-0
+
+# Argument to choose the model: piper, vits, xtts
+ARG TTS_MODEL="xtts"
+ENV TTS_MODEL=$TTS_MODEL
+
+ARG ALLTALK_DIR=/opt/alltalk
+
+SHELL ["/bin/bash", "-l", "-c"]
+ENV SHELL=/bin/bash
+ENV HOST=0.0.0.0
+ENV DEBIAN_FRONTEND=noninteractive
+ENV CUDA_DOCKER_ARCH=all
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV CONDA_AUTO_UPDATE_CONDA="false"
+
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+
+##############################################################################
+# Installation/Basic Utilities
+##############################################################################
+RUN <<EOR
+    apt-get update
+    apt-get upgrade -y
+    apt-get install --no-install-recommends -y \
+      espeak-ng \
+      curl \
+      wget \
+      jq \
+      vim
+
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+EOR
+
+WORKDIR ${ALLTALK_DIR}
+
+##############################################################################
+# Create a conda environment and install dependencies:
+##############################################################################
+COPY docker/conda/build/environment-*.yml environment.yml
+RUN <<EOR
+    RESULT=$( { conda env create -f environment.yml ; } 2>&1 )
+
+    if echo $RESULT | grep -izq error ; then
+      echo "Failed to install conda dependencies: $RESULT"
+      exit 1
+    fi
+
+    conda clean -a && pip cache purge
+EOR
+
+##############################################################################
+# Install python dependencies (cannot use --no-deps because requirements are not complete)
+##############################################################################
+COPY system/config system/config
+COPY system/requirements/requirements_standalone.txt system/requirements/requirements_standalone.txt
+COPY system/requirements/requirements_parler.txt system/requirements/requirements_parler.txt
+ENV PIP_CACHE_DIR=${ALLTALK_DIR}/pip_cache
+RUN <<EOR
+    conda activate alltalk
+
+    mkdir ${ALLTALK_DIR}k/pip_cache
+    pip install --no-cache-dir --cache-dir=${ALLTALK_DIR}/pip_cache -r system/requirements/requirements_standalone.txt
+    pip install --no-cache-dir --cache-dir=${ALLTALK_DIR}/pip_cache --upgrade gradio==4.32.2
+    # Parler:
+    pip install --no-cache-dir --cache-dir=${ALLTALK_DIR}/pip_cache -r system/requirements/requirements_parler.txt
+
+    conda clean --all --force-pkgs-dirs -y && pip cache purge
+EOR
+
+##############################################################################
+# Install DeepSpeed
+##############################################################################
+RUN mkdir -p /tmp/deepseped
+COPY docker/deepspeed/build/*.whl /tmp/deepspeed/
+RUN <<EOR
+    DEEPSPEED_WHEEL=$(realpath /tmp/deepspeed/*.whl)
+    conda activate alltalk
+
+    RESULT=$( { CFLAGS="-I$CONDA_PREFIX/include/" LDFLAGS="-L$CONDA_PREFIX/lib/" \
+      pip install --no-cache-dir ${DEEPSPEED_WHEEL} ; } 2>&1 )
+
+    if echo $RESULT | grep -izq error ; then
+      echo "Failed to install pip dependencies: $RESULT"
+      exit 1
+    fi
+
+    rm ${DEEPSPEED_WHEEL}
+    conda clean --all --force-pkgs-dirs -y && pip cache purge
+EOR
+
+##############################################################################
+# Writing scripts to start alltalk:
+##############################################################################
+RUN <<EOR
+    cat << EOF > start_alltalk.sh
+#!/usr/bin/env bash
+source ~/.bashrc
+
+# Merging config from docker_confignew.json into confignew.json:
+jq -s '.[0] * .[1] * .[2]' confignew.json docker_default_config.json docker_confignew.json  > confignew.json.tmp
+mv confignew.json.tmp confignew.json
+
+conda activate alltalk
+python script.py
+EOF
+    cat << EOF > start_finetune.sh
+#!/usr/bin/env bash
+source ~/.bashrc
+export TRAINER_TELEMETRY=0
+conda activate alltalk
+python finetune.py
+EOF
+    cat << EOF > start_diagnostics.sh
+#!/usr/bin/env bash
+source ~/.bashrc
+conda activate alltalk
+python diagnostics.py
+EOF
+    chmod +x start_alltalk.sh
+    chmod +x start_environment.sh
+    chmod +x start_finetune.sh
+    chmod +x start_diagnostics.sh
+EOR
+
+COPY . .
+
+##############################################################################
+# Create script to execute firstrun.py and run it:
+##############################################################################
+RUN echo $'#!/usr/bin/env bash \n\
+source ~/.bashrc \n\
+conda activate alltalk \n\
+python ./system/config/firstrun.py $@' > ./start_firstrun.sh
+
+RUN chmod +x start_firstrun.sh
+RUN ./start_firstrun.sh --tts_model $TTS_MODEL
+
+RUN mkdir -p ${ALLTALK_DIR}/outputs
+RUN mkdir -p /root/.triton/autotune
+
+##############################################################################
+# Enable deepspeed for all models:
+##############################################################################
+RUN find . -name model_settings.json -exec sed -i -e 's/"deepspeed_enabled": false/"deepspeed_enabled": true/g' {} \;
+
+##############################################################################
+# Download all RVC models:
+##############################################################################
+RUN <<EOR
+  jq -r '.[]' system/tts_engines/rvc_files.json > /tmp/rvc_files.txt
+  xargs -n 1 curl --create-dirs --output-dir models/rvc_base -LO < /tmp/rvc_files.txt
+  rm -f /tmp/rvc_files.txt
+EOR
+
+##############################################################################
+# Start alltalk:
+##############################################################################
+ENTRYPOINT ["sh", "-c", "./start_alltalk.sh"]
diff --git a/docker-build.sh b/docker-build.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+cd $SCRIPT_DIR
+
+. ${SCRIPT_DIR=}/docker/variables.sh
+
+TTS_MODEL=xtts
+DOCKER_TAG=latest
+CLEAN=false
+
+# Parse arguments
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    --cuda-version)
+      CUDA_VERSION="$2"
+      shift
+      ;;
+    --python-version)
+      PYTHON_VERSION="$2"
+      shift
+      ;;
+    --tts_model)
+      TTS_MODEL="$2"
+      shift
+      ;;
+    --tag)
+      DOCKER_TAG="$2"
+      shift
+      ;;
+    --clean)
+      CLEAN=true
+      ;;
+    *)
+      printf '%s\n' "Invalid argument ($1)"
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+if [ "$CLEAN" = true ]; then
+  rm -rf ${SCRIPT_DIR=}/docker/conda/build
+  rm -rf ${SCRIPT_DIR=}/docker/deepspeed/build
+fi
+
+$SCRIPT_DIR/docker/conda/build-conda-env.sh \
+  --cuda-version ${CUDA_VERSION} \
+  --python-version ${PYTHON_VERSION}
+
+$SCRIPT_DIR/docker/deepspeed/build-deepspeed.sh \
+  --python-version ${PYTHON_VERSION}
+
+echo "Starting docker build process using TTS model '${TTS_MODEL}' and docker tag '${DOCKER_TAG}'"
+echo "Building for CUDA $CUDA_VERSION using python ${PYTHON_VERSION}"
+
+docker buildx \
+  build \
+  --progress=plain \
+  --build-arg TTS_MODEL=$TTS_MODEL \
+  --build-arg ALLTALK_DIR=$ALLTALK_DIR \
+  -t alltalk_beta:${DOCKER_TAG} \
+  .
+
+echo "Docker build process finished. Use docker-start.sh to start the container."
diff --git a/docker-start.sh b/docker-start.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+. ${SCRIPT_DIR=}/docker/variables.sh
+
+WITH_UI=true
+DOCKER_TAG=latest
+declare -a ADDITIONAL_ARGS=()
+
+# Parse arguments
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    --config)
+      CONFIG="$2"
+      shift
+      ;;
+    --voices)
+      VOICES="$2"
+      shift
+      ;;
+    --rvc_voices)
+      RVC_VOICES="$2"
+      shift
+      ;;
+    --no_ui)
+      WITH_UI=false
+      ;;
+    --tag)
+      DOCKER_TAG="$2"
+      shift
+      ;;
+    *)
+      # Allow to pass arbitrary arguments to docker as well to be flexible:
+      ADDITIONAL_ARGS+=( $1 )
+      ;;
+  esac
+  shift
+done
+
+# Compose docker arguments based on user input to the script:
+declare -a DOCKER_ARGS=()
+
+if [[ -n $CONFIG ]]; then
+  # Mount the config file to docker_confignew.json:
+  DOCKER_ARGS+=( -v ${CONFIG}:${ALLTALK_DIR}/docker_confignew.json )
+fi
+
+if [[ -n $VOICES ]]; then
+  DOCKER_ARGS+=( -v ${VOICES}:${ALLTALK_DIR}/voices )
+fi
+
+if [[ -n $RVC_VOICES ]]; then
+  DOCKER_ARGS+=( -v ${RVC_VOICES}:${ALLTALK_DIR}/models/rvc_voices )
+fi
+
+if [ "$WITH_UI" = true ] ; then
+    DOCKER_ARGS+=( -p 7852:7852 )
+fi
+
+docker run \
+  --rm \
+  -it \
+  -p 7851:7851 \
+  --gpus=all \
+  --name alltalk \
+ "${DOCKER_ARGS[@]}" \
+ "${ADDITIONAL_ARGS[@]}" \
+  alltalk_beta:${DOCKER_TAG} &> /dev/stdout