From 916f54e242c663ede412c451feec47408cbd1649 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 5 Dec 2024 15:07:42 -0300 Subject: [PATCH 1/5] workflows: use a common debugger (ci-e2e-debug-fail.sh) Create a common debugger script (./hack/ci-e2e-debug-fail.sh) that should be called by workflows in case of failure, to help on debugging activities. By switching to a common script we avoid the problem of testing on pull_request_target triggered workflows. Also reduce the amount of duplicated code. Signed-off-by: Wainer dos Santos Moschetta --- .github/workflows/e2e_libvirt.yaml | 55 +----------------- hack/ci-e2e-debug-fail.sh | 89 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 53 deletions(-) create mode 100755 hack/ci-e2e-debug-fail.sh diff --git a/.github/workflows/e2e_libvirt.yaml b/.github/workflows/e2e_libvirt.yaml index e94c08db7..f1c617361 100644 --- a/.github/workflows/e2e_libvirt.yaml +++ b/.github/workflows/e2e_libvirt.yaml @@ -186,61 +186,10 @@ jobs: - name: Debug tests failure if: failure() && steps.runTests.outcome == 'failure' + working-directory: ./ run: | export KUBECONFIG="${HOME}/.kcli/clusters/peer-pods/auth/kubeconfig" - - echo "::group::KBS installation" - kubectl get pods -n coco-tenant - kubectl describe pods -n coco-tenant - echo "::endgroup::" - - echo "::group::CoCo and Peer Pods installation" - kubectl get pods -n confidential-containers-system - kubectl describe pods -n confidential-containers-system - echo "::endgroup::" - - echo "::group::cloud-api-adaptor logs" - kubectl logs -l app=cloud-api-adaptor -n confidential-containers-system - echo "::endgroup::" - - echo "::group::kbs logs" - kubectl logs deployment/kbs -n coco-tenant - echo "::endgroup::" - - for ns in $(kubectl get ns -o name 2>/dev/null | sed 's#namespace/##' | grep "^coco-pp-"); do - for pod in $(kubectl get pods -o name -n "$ns" 2>/dev/null); do - echo "::group::Describe $pod (namespace/$ns)" - kubectl describe "$pod" -n "$ns" - echo "::endgroup::" - done - done - - for worker in $(kubectl get node -o name -l node.kubernetes.io/worker 2>/dev/null); do - echo "::group::journalctl -t kata ($worker)" - kubectl debug --image quay.io/prometheus/busybox -q -i \ - "$worker" -- chroot /host journalctl -x -t kata --no-pager - echo "::endgroup::" - done - - echo "::group::Libvirt domains" - sudo virsh list - echo "::endgroup::" - - for podvm in $(sudo virsh list --name | grep "podvm-"); do - echo "::group::podvm $podvm" - sudo virsh dominfo "$podvm" - sudo virsh domifaddr "$podvm" - echo "::endgroup::" - done - - echo "::group::podvm base volume" - sudo virsh vol-info --pool default podvm-base.qcow2 - ls -lh /var/lib/libvirt/images/podvm-base.qcow2 - echo "::endgroup::" - - echo "::group::Check podvm base volume integrity" - sudo qemu-img check /var/lib/libvirt/images/podvm-base.qcow2 - echo "::endgroup::" + ./hack/ci-e2e-debug-fail.sh # Avoid running with `set -e` as command fails should be allowed shell: bash {0} diff --git a/hack/ci-e2e-debug-fail.sh b/hack/ci-e2e-debug-fail.sh new file mode 100755 index 000000000..b07e40c68 --- /dev/null +++ b/hack/ci-e2e-debug-fail.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# (C) Copyright Confidential Containers Contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Primarily used on Github workflows to debug failed pipelines. +# +# NOTE: if you want a debugger for MY_PROVIDER provider then you just need +# to create the debug_MY_PROVIDER function. Nothing else is needed. +# +# Not setting errexit, nounset, and pipefail because it is fine and should +# continue if any command fail. + +CLOUD_PROVIDER=${CLOUD_PROVIDER:-} + +# Get common debug information. +# +debug_common() { + echo "::group::KBS installation" + kubectl get pods -n coco-tenant + kubectl describe pods -n coco-tenant + echo "::endgroup::" + + echo "::group::CoCo and Peer Pods installation" + kubectl get pods -n confidential-containers-system + kubectl describe pods -n confidential-containers-system + echo "::endgroup::" + + echo "::group::cloud-api-adaptor logs" + kubectl logs -l app=cloud-api-adaptor --tail=-1 -n confidential-containers-system + echo "::endgroup::" + + echo "::group::kbs logs" + kubectl logs deployment/kbs -n coco-tenant + echo "::endgroup::" + + for ns in $(kubectl get ns -o name 2>/dev/null | sed 's#namespace/##' | grep "^coco-pp-"); do + for pod in $(kubectl get pods -o name -n "$ns" 2>/dev/null); do + echo "::group::Describe $pod (namespace/$ns)" + kubectl describe "$pod" -n "$ns" + echo "::endgroup::" + done + done + + for worker in $(kubectl get node -o name -l node.kubernetes.io/worker 2>/dev/null); do + echo "::group::journalctl -t kata ($worker)" + kubectl debug --image quay.io/prometheus/busybox -q -i \ + "$worker" -- chroot /host journalctl -x -t kata --no-pager + echo "::endgroup::" + done +} + +# Debugger for Libvirt. +# +debug_libvirt() { + echo "::group::Libvirt domains" + sudo virsh list + echo "::endgroup::" + + for podvm in $(sudo virsh list --name | grep "podvm-"); do + echo "::group::podvm $podvm" + sudo virsh dominfo "$podvm" + sudo virsh domifaddr "$podvm" + echo "::endgroup::" + done + + echo "::group::podvm base volume" + sudo virsh vol-info --pool default podvm-base.qcow2 + ls -lh /var/lib/libvirt/images/podvm-base.qcow2 + echo "::endgroup::" + + echo "::group::Check podvm base volume integrity" + sudo qemu-img check /var/lib/libvirt/images/podvm-base.qcow2 + echo "::endgroup::" +} + +main() { + debug_common + + if [ -n "$CLOUD_PROVIDER" ]; then + if ! type -a "debug_${CLOUD_PROVIDER}" &>/dev/null; then + echo "INFO: Cannot get further information as debugger for ${CLOUD_PROVIDER} is not implemented" + else + "debug_${CLOUD_PROVIDER}" + fi + fi +} + +main "$@" From 6a0fcc31cede0dfdc929f2b48d44284393e63db2 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 25 Jul 2024 17:14:27 -0300 Subject: [PATCH 2/5] workflows: add test e2e workflow for docker Add a callable workflow that run the e2e tests for the docker provider. This workflow is similar to e2e_libvirt.yaml. Signed-off-by: Wainer dos Santos Moschetta Signed-off-by: stevenhorsman --- .github/workflows/e2e_docker.yaml | 124 ++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 .github/workflows/e2e_docker.yaml diff --git a/.github/workflows/e2e_docker.yaml b/.github/workflows/e2e_docker.yaml new file mode 100644 index 000000000..72e2ab902 --- /dev/null +++ b/.github/workflows/e2e_docker.yaml @@ -0,0 +1,124 @@ +# (C) Copyright Confidential Containers Contributors 2024. +# SPDX-License-Identifier: Apache-2.0 +# +# Run docker e2e tests. +name: (Callable) docker e2e tests + +on: + workflow_call: + inputs: + podvm_image: + required: true + type: string + caa_image: + required: true + type: string + caa_image_tag: + required: false + default: "latest" + type: string + install_directory_artifact: + description: The archive name of the install directory + default: '' + required: false + type: string + git_ref: + default: 'main' + description: Git ref to checkout the cloud-api-adaptor repository. Defaults to main. + required: false + type: string + +env: + CLOUD_PROVIDER: docker + CLUSTER_NAME: peer-pods + DEBIAN_FRONTEND: noninteractive + +defaults: + run: + working-directory: src/cloud-api-adaptor + +jobs: + test-docker: + runs-on: ubuntu-22.04 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ inputs.git_ref }} + + - name: Rebase the code + if: github.event_name == 'pull_request_target' + working-directory: ./ + run: | + ./hack/ci-helper.sh rebase-atop-of-the-latest-target-branch + + - name: Login to quay Container Registry + if: ${{ startsWith(inputs.podvm_image, 'quay.io') }} + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ secrets.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} + + - name: Login to the ghcr Container registry + if: ${{ startsWith(inputs.podvm_image, 'ghcr.io') }} + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Read properties from versions.yaml + run: | + sudo snap install yq + go_version="$(yq '.tools.golang' versions.yaml)" + [ -n "$go_version" ] + echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV" + + - name: Setup Golang version ${{ env.GO_VERSION }} + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Get the install directory + if: ${{ inputs.install_directory_artifact != '' }} + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.install_directory_artifact }} + path: src/cloud-api-adaptor/install + + - name: Config docker + run: | + cat <<- EOF > docker.properties + CAA_IMAGE="${{ inputs.caa_image }}" + CAA_IMAGE_TAG="${{ inputs.caa_image_tag }}" + DOCKER_PODVM_IMAGE="${{ inputs.podvm_image }}" + DOCKER_HOST="unix:///var/run/docker.sock" + DOCKER_NETWORK_NAME="kind" + EOF + # For debugging + cat docker.properties + + - name: run tests + id: runTests + run: | + export CLOUD_PROVIDER=docker + export DEPLOY_KBS=false + export TEST_PROVISION=yes + export TEST_TEARDOWN=no + export TEST_PROVISION_FILE="$PWD/docker.properties" + export TEST_PODVM_IMAGE="${{ inputs.podvm_image }}" + export TEST_E2E_TIMEOUT="50m" + + make test-e2e + + - name: Debug tests failure + if: failure() && steps.runTests.outcome == 'failure' + working-directory: ./ + run: | + export KUBECONFIG="${HOME}/kube_${CLUSTER_NAME}" + kind get kubeconfig -n "$CLUSTER_NAME" > "$KUBECONFIG" + ./hack/ci-e2e-debug-fail.sh + # Avoid running with `set -e` as command fails should be allowed + shell: bash {0} From bb3c7f3359c1eaa1a3df98a9d4eaa6e2071051a3 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Mon, 29 Jul 2024 18:57:42 -0300 Subject: [PATCH 3/5] workflows: enable docker e2e test in e2e_run_all This will make the e2e tests for docker to run. Notice that's set continue-on-error so that the e2e_run_all workflow exit status won't change, i.e. any failure on e2e_docker is disregarded. Signed-off-by: Wainer dos Santos Moschetta --- .github/workflows/e2e_docker.yaml | 2 ++ .github/workflows/e2e_run_all.yaml | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e_docker.yaml b/.github/workflows/e2e_docker.yaml index 72e2ab902..9ccff221b 100644 --- a/.github/workflows/e2e_docker.yaml +++ b/.github/workflows/e2e_docker.yaml @@ -40,6 +40,8 @@ defaults: jobs: test-docker: runs-on: ubuntu-22.04 + # TODO: remove this when the job gets stable + continue-on-error: true steps: - name: Checkout Code uses: actions/checkout@v4 diff --git a/.github/workflows/e2e_run_all.yaml b/.github/workflows/e2e_run_all.yaml index 90b9de475..5f6523d37 100644 --- a/.github/workflows/e2e_run_all.yaml +++ b/.github/workflows/e2e_run_all.yaml @@ -121,7 +121,7 @@ jobs: outputs: matrix: ${{ steps.matrix.outputs.matrix }} env: - PROVIDERS: "libvirt" + PROVIDERS: "docker libvirt" steps: - name: Checkout Code uses: actions/checkout@v4 @@ -283,3 +283,29 @@ jobs: git_ref: ${{ inputs.git_ref }} oras: true secrets: inherit + + # Run docker e2e tests if pull request labeled 'test_e2e_docker' + docker: + name: docker + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + contains(github.event.pull_request.labels.*.name, 'test_e2e_docker') + needs: [podvm_mkosi_amd64, caa_image_amd64] + strategy: + fail-fast: false + matrix: + os: + - fedora + provider: + - docker + arch: + - amd64 + uses: ./.github/workflows/e2e_docker.yaml + with: + caa_image: ${{ inputs.registry }}/cloud-api-adaptor + caa_image_tag: ${{ inputs.caa_image_tag }} + podvm_image: ${{ needs.podvm_mkosi_amd64.outputs.docker_oci_image }} + install_directory_artifact: install_directory + git_ref: ${{ inputs.git_ref }} + secrets: inherit From 409464048f21b008a90c441265fe595805c9d9c9 Mon Sep 17 00:00:00 2001 From: stevenhorsman Date: Fri, 6 Dec 2024 09:09:21 +0000 Subject: [PATCH 4/5] workflows/docker: Remove installation directory As discussed in #2171 the CAA_IMAGE envs are not working in the e2e code and combined with the installation directory, it seems seems to add confusion when we need different CAA images for decoupling of different architectures, so switch docker to use the same approach as libvirt for consistency. Signed-off-by: stevenhorsman --- .github/workflows/e2e_docker.yaml | 28 +++++++++++++++------------- .github/workflows/e2e_run_all.yaml | 4 ++-- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/e2e_docker.yaml b/.github/workflows/e2e_docker.yaml index 9ccff221b..b6b4e3fd9 100644 --- a/.github/workflows/e2e_docker.yaml +++ b/.github/workflows/e2e_docker.yaml @@ -11,11 +11,7 @@ on: required: true type: string caa_image: - required: true - type: string - caa_image_tag: - required: false - default: "latest" + description: The cloud-api-adaptor OCI image (including tag) to test type: string install_directory_artifact: description: The archive name of the install directory @@ -83,18 +79,24 @@ jobs: with: go-version: ${{ env.GO_VERSION }} - - name: Get the install directory - if: ${{ inputs.install_directory_artifact != '' }} - uses: actions/download-artifact@v4 - with: - name: ${{ inputs.install_directory_artifact }} - path: src/cloud-api-adaptor/install + - name: Install kustomize + run: | + command -v kustomize >/dev/null || \ + curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | \ + sudo bash -s /usr/local/bin + + - name: Update kustomization configuration + run: | + cd "install/overlays/docker" + kustomize edit set image "cloud-api-adaptor=${{ inputs.caa_image }}" + # Print for debugging + echo "::group::docker kustomization" + cat kustomization.yaml + echo "::endgroup::" - name: Config docker run: | cat <<- EOF > docker.properties - CAA_IMAGE="${{ inputs.caa_image }}" - CAA_IMAGE_TAG="${{ inputs.caa_image_tag }}" DOCKER_PODVM_IMAGE="${{ inputs.podvm_image }}" DOCKER_HOST="unix:///var/run/docker.sock" DOCKER_NETWORK_NAME="kind" diff --git a/.github/workflows/e2e_run_all.yaml b/.github/workflows/e2e_run_all.yaml index 5f6523d37..93bbed265 100644 --- a/.github/workflows/e2e_run_all.yaml +++ b/.github/workflows/e2e_run_all.yaml @@ -116,6 +116,7 @@ jobs: # IMPORTANT: If you are enabling e2e tests for a given provider, # then please update the PROVIDERS list (space-separated names, e.g., # "aws libvirt"). + # TODO - when the packer approach is removed this can go as well prep_install: runs-on: ubuntu-24.04 outputs: @@ -303,8 +304,7 @@ jobs: - amd64 uses: ./.github/workflows/e2e_docker.yaml with: - caa_image: ${{ inputs.registry }}/cloud-api-adaptor - caa_image_tag: ${{ inputs.caa_image_tag }} + caa_image: ${{ inputs.registry }}/cloud-api-adaptor:${{ inputs.caa_image_tag }}-amd64-dev podvm_image: ${{ needs.podvm_mkosi_amd64.outputs.docker_oci_image }} install_directory_artifact: install_directory git_ref: ${{ inputs.git_ref }} From 483a246e63c5aedc68370cc5133636a6c6115401 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 16 Jan 2025 14:14:31 -0300 Subject: [PATCH 5/5] workflows/docker: switch to CRI-O The e2e tests for docker on Kind and Containerd has failed due the bug of nydus-snapshotter, the well-known problem of image layers not being found at host. That issues doesn't affect CRI-O, so let's switch to that container runtime for running the tests on. Signed-off-by: Wainer dos Santos Moschetta Signed-off-by: stevenhorsman --- .github/workflows/e2e_docker.yaml | 7 +++++++ .github/workflows/e2e_run_all.yaml | 3 +++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/e2e_docker.yaml b/.github/workflows/e2e_docker.yaml index b6b4e3fd9..d99000b7b 100644 --- a/.github/workflows/e2e_docker.yaml +++ b/.github/workflows/e2e_docker.yaml @@ -23,6 +23,11 @@ on: description: Git ref to checkout the cloud-api-adaptor repository. Defaults to main. required: false type: string + container_runtime: + default: 'containerd' + description: Name of the container runtime. Either containerd or crio. + required: false + type: string env: CLOUD_PROVIDER: docker @@ -100,6 +105,7 @@ jobs: DOCKER_PODVM_IMAGE="${{ inputs.podvm_image }}" DOCKER_HOST="unix:///var/run/docker.sock" DOCKER_NETWORK_NAME="kind" + CONTAINER_RUNTIME="${{ inputs.container_runtime }}" EOF # For debugging cat docker.properties @@ -108,6 +114,7 @@ jobs: id: runTests run: | export CLOUD_PROVIDER=docker + export CONTAINER_RUNTIME="${{ inputs.container_runtime }}" export DEPLOY_KBS=false export TEST_PROVISION=yes export TEST_TEARDOWN=no diff --git a/.github/workflows/e2e_run_all.yaml b/.github/workflows/e2e_run_all.yaml index 93bbed265..fd06a0a73 100644 --- a/.github/workflows/e2e_run_all.yaml +++ b/.github/workflows/e2e_run_all.yaml @@ -296,6 +296,8 @@ jobs: strategy: fail-fast: false matrix: + container_runtime: + - crio os: - fedora provider: @@ -305,6 +307,7 @@ jobs: uses: ./.github/workflows/e2e_docker.yaml with: caa_image: ${{ inputs.registry }}/cloud-api-adaptor:${{ inputs.caa_image_tag }}-amd64-dev + container_runtime: ${{ matrix.container_runtime }} podvm_image: ${{ needs.podvm_mkosi_amd64.outputs.docker_oci_image }} install_directory_artifact: install_directory git_ref: ${{ inputs.git_ref }}