Skip to content

Commit

Permalink
Build docker image (#2542)
Browse files Browse the repository at this point in the history
Downloading pip dependencies takes 2-4 min, but downloading the new docker image takes +1min from the pytorch one
Not sure how downloading all dependencies to a new image takes in comparison to pulling docker image

Docker image scripts are copied from pytorch, I tried to remove stuff but there's definitely a lot left over
  • Loading branch information
clee2000 authored Mar 27, 2024
1 parent 9ceac50 commit 2ad3512
Show file tree
Hide file tree
Showing 11 changed files with 344 additions and 118 deletions.
23 changes: 23 additions & 0 deletions .ci/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ENV DEBIAN_FRONTEND noninteractive

# Install common dependencies (so that this step can be cached separately)
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh

# Install conda and other packages
ENV ANACONDA_PYTHON_VERSION=3.10
ENV CONDA_CMAKE yes
ENV DOCS yes
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY ./requirements.txt /opt/conda/
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements.txt

CMD ["bash"]
24 changes: 24 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

IMAGE_NAME="$1"
shift

export UBUNTU_VERSION="20.04"

export BASE_IMAGE="ubuntu:${UBUNTU_VERSION}"
echo "Building ${IMAGE_NAME} Docker image"

docker build \
--no-cache \
--progress=plain \
-f Dockerfile \
--build-arg BASE_IMAGE="${BASE_IMAGE}" \
"$@" \
.
26 changes: 26 additions & 0 deletions .ci/docker/common/common_utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Work around bug where devtoolset replaces sudo and breaks it.
as_ci_user() {
# NB: unsetting the environment variables works around a conda bug
# https://github.com/conda/conda/issues/6576
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
# NB: This must be run from a directory that the user has access to,
# works around https://github.com/conda/conda-package-handling/pull/34
sudo -E -H env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
}

conda_install() {
# Ensure that the install command don't upgrade/downgrade Python
# This should be called as
# conda_install pkg1 pkg2 ... [-c channel]
as_ci_user conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
}

conda_run() {
as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
}

pip_install() {
as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
}
47 changes: 47 additions & 0 deletions .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker

set -ex

install_ubuntu() {
# Install common dependencies
apt-get update
# TODO: Some of these may not be necessary
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake=3.16* \
curl \
git \
wget \
sudo \
vim \
jq \
vim \
unzip \
gdb \
rsync \
libssl-dev \
p7zip-full \
libglfw3 \
libglfw3-dev \
sox \
libsox-dev \
libsox-fmt-all

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
54 changes: 54 additions & 0 deletions .ci/docker/common/install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

set -ex

# Optionally install conda
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
BASE_URL="https://repo.anaconda.com/miniconda"

MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)

CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"

mkdir -p /opt/conda

source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"

pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
as_ci_user bash "${CONDA_FILE}" -b -f -p "/opt/conda"
popd

# NB: Don't do this, rely on the rpath to get it right
#echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
#ldconfig
sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
export PATH="/opt/conda/bin:$PATH"

# Ensure we run conda in a directory that the user has write access to
pushd /opt/conda

# Prevent conda from updating to 4.14.0, which causes docker build failures
# See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
# Uncomment the below when resolved to track the latest conda update
# as_ci_user conda update -y -n base conda

# Install correct Python version
as_ci_user conda create -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION"

# Use conda cmake in some cases. Conda cmake will be newer than our supported
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
# following builds that we know should use conda. Specifically, Ubuntu bionic
# and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
conda_install cmake

# Install pip packages
pip_install -r /opt/conda/requirements.txt

apt-get update
apt-get -y install expect-dev

popd
fi
21 changes: 21 additions & 0 deletions .ci/docker/common/install_docs_reqs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker
set -ex

apt-get update
apt-get install -y gpg-agent

curl --retry 3 -sL https://deb.nodesource.com/setup_20.x | sudo -E bash -
sudo apt-get install -y nodejs

curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list

apt-get update
apt-get install -y --no-install-recommends yarn
yarn global add katex --prefix /usr/local

sudo apt-get -y install doxygen

apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
70 changes: 70 additions & 0 deletions .ci/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# --extra-index-url https://download.pytorch.org/whl/cu117/index.html # Use this to run/publish tutorials against the latest binaries during the RC stage. Comment out after the release. Each release verify the correct cuda version.
# Refer to ./jenkins/build.sh for tutorial build instructions

sphinx==5.0.0
sphinx-gallery==0.11.1
sphinx_design
nbsphinx
docutils==0.16
sphinx-copybutton
pypandoc==1.12
pandocfilters
markdown
tqdm==4.66.1
numpy==1.24.4
matplotlib
librosa
torch
torchvision
torchtext
torchdata
networkx
PyHamcrest
bs4
awscliv2==2.1.1
flask
spacy==3.4.1
ray[tune]==2.7.2
tensorboard
jinja2==3.1.3
pytorch-lightning
torchx
torchrl==0.3.0
tensordict==0.3.0
ax-platform
nbformat>==5.9.2
datasets
transformers
torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable
onnx
onnxscript
onnxruntime

importlib-metadata==6.8.0

# PyTorch Theme
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme

ipython

sphinxcontrib.katex
# to run examples
boto3
pandas
requests
scikit-image
scipy==1.11.1
numba==0.57.1
pillow==10.2.0
wget
gym==0.26.2
gym-super-mario-bros==7.4.0
pyopengl
gymnasium[mujoco]==0.27.0
timm
iopath
pygame==2.1.2
pycocotools
semilearn==0.3.2
torchao==0.0.3
segment_anything==1.0
60 changes: 19 additions & 41 deletions .github/workflows/build-tutorials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ jobs:
- { shard: 15, num_shards: 15, runner: "linux.4xlarge.nvidia.gpu" }
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9"
CUDA_VERSION: "9"
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
Expand All @@ -54,27 +51,21 @@ jobs:
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main

- name: Calculate docker image
shell: bash
id: docker-image
run: |
set -ex
# for some reason, pip installs it in a different place than what is looked at in the py file
pip3 install requests==2.26
pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py)
echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}"
- name: Calculate/build docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: tutorials

- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.docker-image.outputs.docker-image }}
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

- name: Build
shell: bash
env:
DOCKER_IMAGE: ${{ steps.docker-image.outputs.docker-image }}
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
NUM_WORKERS: ${{ matrix.num_shards }}
WORKER_ID: ${{ matrix.shard }}
COMMIT_ID: ${{ github.sha }}
Expand All @@ -95,16 +86,13 @@ jobs:
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--tty \
--detach \
--user jenkins \
--shm-size=2gb \
--name="${container_name}" \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
-v "${GITHUB_WORKSPACE}:/var/lib/workspace" \
-w /var/lib/workspace \
"${DOCKER_IMAGE}"
)
echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash
docker exec -t "${container_name}" sh -c ".jenkins/build.sh"
- name: Teardown Linux
Expand All @@ -116,9 +104,6 @@ jobs:
needs: worker
runs-on: [self-hosted, linux.2xlarge]
environment: ${{ github.ref == 'refs/heads/main' && 'pytorchbot-env' || '' }}
env:
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9"
CUDA_VERSION: "9"
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
Expand All @@ -134,27 +119,21 @@ jobs:
- name: Setup Linux
uses: pytorch/pytorch/.github/actions/setup-linux@main

- name: Calculate docker image
shell: bash
id: docker-image
run: |
set -ex
# for some reason, pip installs it in a different place than what is looked at in the py file
pip3 install requests==2.26
pyTorchDockerImageTag=$(python3 .jenkins/get_docker_tag.py)
echo "docker-image=${DOCKER_IMAGE}:${pyTorchDockerImageTag}" >> "${GITHUB_OUTPUT}"
- name: Calculate/build docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: tutorials

- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.docker-image.outputs.docker-image }}
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

- name: Build
shell: bash
env:
DOCKER_IMAGE: ${{ steps.docker-image.outputs.docker-image }}
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
NUM_WORKERS: 15
WORKER_ID: ${{ matrix.shard }}
COMMIT_ID: ${{ github.sha }}
Expand All @@ -177,14 +156,13 @@ jobs:
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--tty \
--detach \
--user jenkins \
--name="${container_name}" \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
-v "${GITHUB_WORKSPACE}:/var/lib/workspace" \
-w /var/lib/workspace \
"${DOCKER_IMAGE}"
)
echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash
docker exec -u root -i "${container_name}" bash
docker exec -t "${container_name}" sh -c ".jenkins/build.sh"
Expand Down
Loading

0 comments on commit 2ad3512

Please sign in to comment.