From 4f20a14d693ad07f626071e7e50322dc7a779f67 Mon Sep 17 00:00:00 2001 From: apostasie Date: Fri, 18 Oct 2024 13:11:29 -0700 Subject: [PATCH] CI: enable no-retry/retry testing separation and workflows cleanup Signed-off-by: apostasie --- .../ghcr-image-build-and-publish.yml | 3 +- .github/workflows/lint.yml | 1 - .github/workflows/project.yml | 2 +- .github/workflows/test-canary.yml | 12 ++- .github/workflows/test-kube.yml | 5 +- .github/workflows/test.yml | 98 +++++++++---------- Dockerfile | 14 +-- Makefile | 3 + hack/test-integration.sh | 49 ++++++++++ 9 files changed, 113 insertions(+), 74 deletions(-) create mode 100755 hack/test-integration.sh diff --git a/.github/workflows/ghcr-image-build-and-publish.yml b/.github/workflows/ghcr-image-build-and-publish.yml index ebb35de53cb..bd7941780a2 100644 --- a/.github/workflows/ghcr-image-build-and-publish.yml +++ b/.github/workflows/ghcr-image-build-and-publish.yml @@ -1,4 +1,4 @@ -name: Container Image Build +name: image # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by @@ -21,7 +21,6 @@ env: # github.repository as / IMAGE_NAME: ${{ github.repository }} - jobs: build: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 57051520299..be29a8c2aec 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -55,7 +55,6 @@ jobs: uses: golangci/golangci-lint-action@v6 with: args: --verbose - other: timeout-minutes: 5 name: yaml | shell | imports order diff --git a/.github/workflows/project.yml b/.github/workflows/project.yml index a03da712e02..6961f0fd565 100644 --- a/.github/workflows/project.yml +++ b/.github/workflows/project.yml @@ -9,7 +9,7 @@ on: jobs: project: - name: Project Checks + name: checks runs-on: ubuntu-24.04 timeout-minutes: 20 steps: diff --git a/.github/workflows/test-canary.yml b/.github/workflows/test-canary.yml index 14dbb5e1455..7c9226f45f8 100644 --- a/.github/workflows/test-canary.yml +++ b/.github/workflows/test-canary.yml @@ -44,11 +44,13 @@ jobs: - name: "Run unit tests" run: go test -v ./pkg/... - name: "Run integration tests" - run: docker run -t --rm --privileged test-integration + run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=false + - name: "Run integration tests (flaky)" + run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=true windows: - runs-on: windows-latest timeout-minutes: 30 + runs-on: windows-latest defaults: run: shell: bash @@ -74,6 +76,7 @@ jobs: cache: true check-latest: true - run: go install ./cmd/nerdctl + - run: go install -v gotest.tools/gotestsum@v1 # This here is solely to get the cni install script, which has not been modified in 3+ years. # There is little to no reason to update this to latest containerd - uses: actions/checkout@v4.2.1 @@ -91,5 +94,6 @@ jobs: ctrdVersion: ${{ env.CONTAINERD_VERSION }} run: powershell hack/configure-windows-ci.ps1 - name: "Run integration tests" - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - run: go test -p 1 -v ./cmd/nerdctl/... + run: ./hack/test-integration.sh -test.only-flaky=false + - name: "Run integration tests (flaky)" + run: ./hack/test-integration.sh -test.only-flaky=true diff --git a/.github/workflows/test-kube.yml b/.github/workflows/test-kube.yml index c8e2ccda405..3c6faaaa457 100644 --- a/.github/workflows/test-kube.yml +++ b/.github/workflows/test-kube.yml @@ -10,13 +10,12 @@ on: paths-ignore: - '**.md' -env: - ROOTFUL: true - jobs: linux: runs-on: "ubuntu-24.04" timeout-minutes: 40 + env: + ROOTFUL: true steps: - uses: actions/checkout@v4.2.1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5eff7064fa9..863b956938c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,9 +11,14 @@ on: env: GO_VERSION: 1.23.x + SHORT_TIMEOUT: 5 + LONG_TIMEOUT: 60 jobs: test-unit: + # Supposed to work: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#example-returning-a-json-data-type + # Apparently does not + # timeout-minutes: ${{ fromJSON(env.SHORT_TIMEOUT) }} timeout-minutes: 5 name: unit | ${{ matrix.goos }} runs-on: "${{ matrix.os }}" @@ -48,11 +53,12 @@ jobs: working-directory: containerd run: GOPATH=$(go env GOPATH) script/setup/install-cni-windows - name: "Run unit tests" - run: go test -v ./pkg/... + run: make test-unit test-integration: + timeout-minutes: 60 + name: rootful | ${{ matrix.containerd }} | ${{ matrix.runner }} runs-on: "${{ matrix.runner }}" - timeout-minutes: 40 strategy: fail-fast: false matrix: @@ -95,23 +101,21 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm64 docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run -t --rm --privileged test-integration + run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=false + - name: "Run integration tests (flaky)" + run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=true test-integration-ipv6: + timeout-minutes: 60 + name: ipv6 | ${{ matrix.containerd }} | ${{ matrix.ubuntu }} runs-on: "ubuntu-${{ matrix.ubuntu }}" - timeout-minutes: 40 strategy: fail-fast: false matrix: # ubuntu-20.04: cgroup v1, ubuntu-22.04 and later: cgroup v2 include: - ubuntu: 24.04 - containerd: v1.7.23 + containerd: v2.0.0-rc.5 env: UBUNTU_VERSION: "${{ matrix.ubuntu }}" CONTAINERD_VERSION: "${{ matrix.containerd }}" @@ -129,7 +133,7 @@ jobs: echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker - name: "Prepare integration test environment" - run: docker build -t test-integration-ipv6 --target test-integration-ipv6 --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . + run: docker build -t test-integration --target test-integration --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . - name: "Remove snap loopback devices (conflicts with our loopback devices in TestRunDevice)" run: | sudo systemctl disable --now snapd.service snapd.socket @@ -147,20 +151,16 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" # The nested IPv6 network inside docker and qemu is complex and needs a bunch of sysctl config. - # Therefore it's hard to debug why the IPv6 tests fail in such an isolation layer. + # Therefore, it's hard to debug why the IPv6 tests fail in such an isolation layer. # On the other side, using the host network is easier at configuration. # Besides, each job is running on a different instance, which means using host network here # is safe and has no side effects on others. - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run --network host -t --rm --privileged test-integration-ipv6 + run: docker run --network host -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-ipv6 test-integration-rootless: - runs-on: "ubuntu-${{ matrix.ubuntu }}" timeout-minutes: 60 + name: "${{ matrix.target }} | ${{ matrix.containerd }} | ${{ matrix.rootlesskit }} | ${{ matrix.ubuntu }}" + runs-on: "ubuntu-${{ matrix.ubuntu }}" strategy: fail-fast: false matrix: @@ -169,24 +169,24 @@ jobs: - ubuntu: 20.04 containerd: v1.6.36 rootlesskit: v1.1.1 # Deprecated - target: test-integration-rootless + target: rootless - ubuntu: 22.04 containerd: v1.7.23 rootlesskit: v2.3.1 - target: test-integration-rootless + target: rootless - ubuntu: 24.04 containerd: v2.0.0-rc.5 rootlesskit: v2.3.1 - target: test-integration-rootless + target: rootless - ubuntu: 24.04 containerd: v1.7.23 rootlesskit: v2.3.1 - target: test-integration-rootless-port-slirp4netns + target: rootless-port-slirp4netns env: UBUNTU_VERSION: "${{ matrix.ubuntu }}" CONTAINERD_VERSION: "${{ matrix.containerd }}" ROOTLESSKIT_VERSION: "${{ matrix.rootlesskit }}" - TEST_TARGET: "${{ matrix.target }}" + TEST_TARGET: "test-integration-${{ matrix.target }}" steps: - name: "Set up AppArmor" if: matrix.ubuntu == '24.04' @@ -226,16 +226,14 @@ jobs: fi echo "WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622}" >> "$GITHUB_ENV" - name: "Test (network driver=slirp4netns, port driver=builtin)" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} + run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./hack/test-integration.sh -test.only-flaky=false + - name: "Test (network driver=slirp4netns, port driver=builtin) (flaky)" + run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./hack/test-integration.sh -test.only-flaky=true - cross: + build: + timeout-minutes: 5 + name: "build | ${{ matrix.go-version }}" runs-on: ubuntu-24.04 - timeout-minutes: 40 strategy: matrix: go-version: ["1.22.x", "1.23.x"] @@ -248,12 +246,13 @@ jobs: go-version: ${{ matrix.go-version }} cache: true check-latest: true - - name: "Cross" + - name: "build" run: GO_VERSION="$(echo ${{ matrix.go-version }} | sed -e s/.x//)" make binaries test-integration-docker-compatibility: + timeout-minutes: 60 + name: docker runs-on: ubuntu-24.04 - timeout-minutes: 45 steps: - uses: actions/checkout@v4.2.1 with: @@ -280,26 +279,18 @@ jobs: - name: "Prepare integration test environment" run: | sudo apt-get install -y expect + go install -v gotest.tools/gotestsum@v1 - name: "Ensure that the integration test suite is compatible with Docker" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon + run: WITH_SUDO=true ./hack/test-integration.sh -test.target=docker - name: "Ensure that the IPv6 integration test suite is compatible with Docker" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon -test.only-ipv6 + run: WITH_SUDO=true ./hack/test-integration.sh -test.target=docker -test.only-ipv6 + - name: "Ensure that the integration test suite is compatible with Docker (flaky only)" + run: WITH_SUDO=true ./hack/test-integration.sh -test.target=docker -test.only-flaky test-integration-windows: - runs-on: windows-2022 timeout-minutes: 30 + name: windows + runs-on: windows-2022 defaults: run: shell: bash @@ -313,6 +304,7 @@ jobs: cache: true check-latest: true - run: go install ./cmd/nerdctl + - run: go install -v gotest.tools/gotestsum@v1 - uses: actions/checkout@v4.2.1 with: repository: containerd/containerd @@ -326,16 +318,16 @@ jobs: env: ctrdVersion: 1.7.23 run: powershell hack/configure-windows-ci.ps1 - # TODO: Run unit tests - name: "Run integration tests" - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - run: go test -p 1 -v ./cmd/nerdctl/... + run: ./hack/test-integration.sh -test.only-flaky=false + - name: "Run integration tests (flaky)" + run: ./hack/test-integration.sh -test.only-flaky=true test-integration-freebsd: + timeout-minutes: 60 name: FreeBSD # ubuntu-24.04 lacks the vagrant package runs-on: ubuntu-22.04 - timeout-minutes: 20 steps: - uses: actions/checkout@v4.2.1 diff --git a/Dockerfile b/Dockerfile index a594358aee4..f1e32fd4153 100644 --- a/Dockerfile +++ b/Dockerfile @@ -276,7 +276,8 @@ ARG DEBIAN_FRONTEND=noninteractive # `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ expect \ - git + git \ + make COPY --from=goversion /GOVERSION /GOVERSION ARG TARGETARCH RUN curl -fsSL --proto '=https' --tlsv1.2 https://golang.org/dl/$(cat /GOVERSION).linux-${TARGETARCH:-amd64}.tar.gz | tar xzvC /usr/local @@ -314,8 +315,7 @@ RUN curl -o nydus-static.tgz -fsSL --proto '=https' --tlsv1.2 "https://github.co tar xzf nydus-static.tgz && \ mv nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/bin/ && \ rm nydus-static.tgz -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] +CMD ["./hack/test-integration.sh"] FROM test-integration AS test-integration-rootless # Install SSH for creating systemd user session. @@ -338,17 +338,11 @@ RUN systemctl disable test-integration-ipfs-offline VOLUME /home/rootless/.local/share COPY ./Dockerfile.d/test-integration-rootless.sh / RUN chmod a+rx /test-integration-rootless.sh -CMD ["/test-integration-rootless.sh", \ - "gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] +CMD ["/test-integration-rootless.sh", "./hack/test-integration.sh"] # test for CONTAINERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns FROM test-integration-rootless AS test-integration-rootless-port-slirp4netns COPY ./Dockerfile.d/home_rootless_.config_systemd_user_containerd.service.d_port-slirp4netns.conf /home/rootless/.config/systemd/user/containerd.service.d/port-slirp4netns.conf RUN chown -R rootless:rootless /home/rootless/.config -FROM test-integration AS test-integration-ipv6 -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon", "-test.only-ipv6"] - FROM base AS demo diff --git a/Makefile b/Makefile index 0831c640047..ae4e18c94f3 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,9 @@ lint-yaml: lint-shell: $(call recursive_wildcard,$(MAKEFILE_DIR)/,*.sh) shellcheck -a -x $^ +test-unit: + go test -v $(MAKEFILE_DIR)/pkg/... + binaries: nerdctl install: diff --git a/hack/test-integration.sh b/hack/test-integration.sh new file mode 100755 index 00000000000..73e2b4ebb19 --- /dev/null +++ b/hack/test-integration.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# shellcheck disable=SC2034,SC2015 +set -o errexit -o errtrace -o functrace -o nounset -o pipefail +root="$(cd "$(dirname "${BASH_SOURCE[0]:-$PWD}")" 2>/dev/null 1>&2 && pwd)" +readonly root + +readonly timeout="60m" +readonly retries="2" +readonly needsudo="${WITH_SUDO:-}" + +# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization +args=(--format=testname --jsonfile /tmp/test-integration.log --packages="$root"/../cmd/nerdctl/...) + +if [ "$#" == 0 ]; then + "$root"/test-integration.sh -test.only-flaky=false + "$root"/test-integration.sh -test.only-flaky=true + exit +fi + +for arg in "$@"; do + if [ "$arg" == "-test.only-flaky=true" ] || [ "$arg" == "-test.only-flaky" ]; then + args+=("--rerun-fails=$retries") + break + fi +done + +if [ "$needsudo" == "true" ] || [ "$needsudo" == "yes" ] || [ "$needsudo" == "1" ]; then + gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -exec sudo -args -test.allow-kill-daemon "$@" +else + gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@" +fi + +echo "These are the tests that took more than 10 seconds:" +gotestsum tool slowest --threshold 10s --jsonfile /tmp/test-integration.log