diff --git a/.github/workflows/ccruntime_e2e.yaml b/.github/workflows/ccruntime_e2e.yaml index 788eb2f0..674480ee 100644 --- a/.github/workflows/ccruntime_e2e.yaml +++ b/.github/workflows/ccruntime_e2e.yaml @@ -62,7 +62,7 @@ jobs: if [ $RUNNING_INSTANCE = "s390x" ]; then args="" fi - ./run-local.sh -r "${{ matrix.runtimeclass }}" "${args}" + ./run-local.sh -t -r "${{ matrix.runtimeclass }}" "${args}" env: RUNNING_INSTANCE: ${{ matrix.instance }} diff --git a/tests/e2e/operator.sh b/tests/e2e/operator.sh index 3503ecd7..601c808d 100755 --- a/tests/e2e/operator.sh +++ b/tests/e2e/operator.sh @@ -32,10 +32,10 @@ build_operator () { # so it's better to check it before adding the target repo. local sd="$(git config --global --get safe.directory ${project_dir} || true)" if [ "${sd}" == "" ]; then - echo "Add repo ${project_dir} to git's safe.directory" + echo "::debug:: Add repo ${project_dir} to git's safe.directory" git config --global --add safe.directory "${project_dir}" else - echo "Repo ${project_dir} already in git's safe.directory" + echo "::debug:: Repo ${project_dir} already in git's safe.directory" fi pushd "$project_dir" >/dev/null @@ -67,9 +67,9 @@ build_pre_install_img() { handle_older_containerd() { command -v containerd >/dev/null || return local version=$(containerd -v | awk '{ print $3 }' | sed 's/^v//') - echo "system's containerd version: $version" + echo "::debug:: system's containerd version: $version" if [[ "$version" =~ ^1.6 || "$version" =~ ^1.5 ]]; then - echo "Old system's containerd ($version). Configuring the operator to install a newer one" + echo "::warning:: Old system's containerd ($version). Configuring the operator to install a newer one" pushd "$project_dir" >/dev/null for kfile in $(find config/ -name "kustomization.yaml" \ -exec grep -l INSTALL_OFFICIAL_CONTAINERD {} \;);do @@ -104,10 +104,10 @@ install_operator() { local cmd="kubectl get pods -n "$op_ns" --no-headers |" cmd+="egrep -q ${controller_pod}.*'\'" if ! wait_for_process 120 10 "$cmd"; then - echo "ERROR: ${controller_pod} pod is not running" + echo "::error:: ${controller_pod} pod is not running" local pod_id="$(get_pods_regex $controller_pod $op_ns)" - echo "DEBUG: Pod $pod_id" + echo "::debug:: Pod $pod_id" debug_pod "$pod_id" "$op_ns" return 1 @@ -135,10 +135,10 @@ install_ccruntime() { cmd="kubectl get pods -n "$op_ns" --no-headers |" cmd+="egrep -q ${pod}.*'\'" if ! wait_for_process 600 30 "$cmd"; then - echo "ERROR: $pod pod is not running" + echo "::error:: $pod pod is not running" local pod_id="$(get_pods_regex $pod $op_ns)" - echo "DEBUG: Pod $pod_id" + echo "::debug:: Pod $pod_id" debug_pod "$pod_id" "$op_ns" return 1 @@ -149,7 +149,7 @@ install_ccruntime() { # There could be a case where it is not even if the pods above are running. cmd="kubectl get runtimeclass | grep -q ${runtimeclass}" if ! wait_for_process 300 30 "$cmd"; then - echo "ERROR: runtimeclass ${runtimeclass} is not up" + echo "::error:: runtimeclass ${runtimeclass} is not up" return 1 fi # To keep operator running, we should resume registry stopped during containerd restart. @@ -164,12 +164,12 @@ uninstall_ccruntime() { popd >/dev/null # Wait and ensure ccruntime pods are gone - # - local cmd="! sudo -E kubectl get pods -n $op_ns |" - cmd+="grep -q -e cc-operator-daemon-install" + # (ensure failing kubectl keeps iterating) + local cmd="_OUT=\$(sudo -E kubectl get pods -n '$op_ns')" + cmd+=" && ! echo \$_OUT | grep -q -e cc-operator-daemon-install" cmd+=" -e cc-operator-pre-install-daemon" if ! wait_for_process 720 30 "$cmd"; then - echo "ERROR: there are ccruntime pods still running" + echo "::error:: there are ccruntime pods still running" echo "::group::Describe pods from $op_ns namespace" kubectl -n "$op_ns" describe pods || true echo "::endgroup::" @@ -183,7 +183,7 @@ uninstall_ccruntime() { # Labels should be gone if kubectl get nodes "$SAFE_HOST_NAME" -o jsonpath='{.metadata.labels}' | \ grep -q -e cc-preinstall -e katacontainers.io; then - echo "ERROR: there are labels left behind" + echo "::error:: there are labels left behind" kubectl get nodes "$SAFE_HOST_NAME" -o jsonpath='{.metadata.labels}' return 1 @@ -207,7 +207,7 @@ kustomization_set_image() { # and this can introduce false-positive on the tests. So let's check the old image really # exist. if ! grep -q "name: ${old}$" ./kustomization.yaml; then - echo "ERROR: expected image ${old} in ${overlay_dir}/kustomization.yaml" + echo "::error:: expected image ${old} in ${overlay_dir}/kustomization.yaml" return 1 fi @@ -242,15 +242,14 @@ uninstall_operator() { popd >/dev/null # Wait and ensure the controller pod is gone - # - local pod="cc-operator-controller-manager" - local cmd="! kubectl get pods -n $op_ns |" - cmd+="grep -q $pod" + # (ensure failing kubectl keeps iterating) + local cmd="_OUT=\$(sudo -E kubectl get pods -n '$op_ns')" + cmd+="&& ! echo \$_OUT | grep -q -e cc-operator-controller-manager" if ! wait_for_process 180 30 "$cmd"; then - echo "ERROR: the controller manager is still running" + echo "::error:: the controller manager is still running" local pod_id="$(get_pods_regex $pod $op_ns)" - echo "DEBUG: Pod $pod_id" + echo "::debug:: Pod $pod_id" debug_pod "$pod_id" "$op_ns" return 1 @@ -269,7 +268,7 @@ wait_for_stabilization() { while read -r pod container restart_count; do if [ "${restart_counts[$pod-$container]--1}" != "$restart_count" ]; then - echo "DEBUG: Pod: $pod, Container: $container, Restart count: $restart_count" + echo "::debug:: Pod: $pod, Container: $container, Restart count: $restart_count" restart_counts["$pod-$container"]=$restart_count change=1 fi @@ -278,10 +277,10 @@ wait_for_stabilization() { [ $change -eq 0 ] && ((iteration+=1)) if [ $iteration -gt 3 ]; then - echo "INFO: No new restarts in 3x21s, proceeding..." + echo "::info:: No new restarts in 3x21s, proceeding..." break elif [ $count -gt 20 ]; then - echo "ERROR: Pods are still restarting after 20x21s, bailing out!" + echo "::error:: Pods are still restarting after 20x21s, bailing out!" return 1 fi @@ -335,7 +334,7 @@ main() { wait_for_stabilization ;; *) - echo "Unknown command '$1'" + echo "::error:: Unknown command '$1'" usage && exit 1 esac fi diff --git a/tests/e2e/operator_tests.bats b/tests/e2e/operator_tests.bats index bea771c0..0cc7449c 100644 --- a/tests/e2e/operator_tests.bats +++ b/tests/e2e/operator_tests.bats @@ -8,6 +8,9 @@ load "${BATS_TEST_DIRNAME}/lib.sh" test_tag="[cc][operator]" +# Set 10m timeout for each test +export BATS_TEST_TIMEOUT=600 + is_operator_installed() { [ "$(kubectl get deployment -n "$ns" --no-headers 2>/dev/null | wc -l)" \ -gt 0 ] diff --git a/tests/e2e/run-local.sh b/tests/e2e/run-local.sh index bfbb780e..38bde695 100755 --- a/tests/e2e/run-local.sh +++ b/tests/e2e/run-local.sh @@ -15,6 +15,7 @@ step_start_cluster=0 step_install_operator=0 runtimeclass="" undo="false" +timeout="false" usage() { cat <<-EOF @@ -29,44 +30,58 @@ usage() { the tests. Defaults to "kata-qemu". -u: undo the installation and configuration before exiting. Useful for baremetal machine were it needs to do clean up for the next tests. + -t: enable default timeout for each operation (useful for CI) EOF } parse_args() { - while getopts "hr:u" opt; do + while getopts "hr:ut" opt; do case $opt in h) usage && exit 0;; r) runtimeclass="$OPTARG";; u) undo="true";; + t) timeout="true";; *) usage && exit 1;; esac done } +run() { + duration=$1; shift + if [ "$timeout" == "true" ]; then + timeout $duration "$@" + else + "$@" + fi +} + undo_changes() { pushd "$script_dir" >/dev/null # Do not try to undo steps that did not execute. if [ $step_install_operator -eq 1 ]; then - echo "INFO: Uninstall the operator" - sudo -E PATH="$PATH" bash -c './operator.sh uninstall' || true + echo "::info:: Uninstall the operator" + run 10m sudo -E PATH="$PATH" bash -c './operator.sh uninstall' || true fi if [ $step_start_cluster -eq 1 ]; then - echo "INFO: Shutdown the cluster" - sudo -E PATH="$PATH" bash -c './cluster/down.sh' || true + echo "::info:: Shutdown the cluster" + run 5m sudo -E PATH="$PATH" bash -c './cluster/down.sh' || true fi if [ $step_bootstrap_env -eq 1 ]; then - echo "INFO: Undo the bootstrap" - ansible-playbook -i localhost, -c local --tags undo ansible/main.yml || true + echo "::info:: Undo the bootstrap" + run 5m ansible-playbook -i localhost, -c local --tags undo ansible/main.yml || true fi popd >/dev/null } on_exit() { + RET="$?" if [ "$undo" == "true" ]; then + [ "$RET" -ne 0 ] && echo && echo "::error:: Testing failed with $RET, starting the cleanup..." undo_changes fi + [ "$RET" -ne 0 ] && echo && echo "::error:: Testing failed with $RET" || echo "::info:: Testing passed" } trap on_exit EXIT @@ -78,28 +93,28 @@ main() { # Check Ansible is installed. if ! command -v ansible-playbook >/dev/null; then - echo "ERROR: ansible-playbook is required to run this script." + echo "::error:: ansible-playbook is required to run this script." exit 1 fi export "PATH=$PATH:/usr/local/bin" pushd "$script_dir" >/dev/null - echo "INFO: Bootstrap the local machine" + echo "::info:: Bootstrap the local machine" step_bootstrap_env=1 - ansible-playbook -i localhost, -c local --tags untagged ansible/main.yml + run 10m ansible-playbook -i localhost, -c local --tags untagged ansible/main.yml - echo "INFO: Bring up the test cluster" + echo "::info:: Bring up the test cluster" step_start_cluster=1 - sudo -E PATH="$PATH" bash -c './cluster/up.sh' + run 10m sudo -E PATH="$PATH" bash -c './cluster/up.sh' export KUBECONFIG=/etc/kubernetes/admin.conf - echo "INFO: Build and install the operator" + echo "::info:: Build and install the operator" step_install_operator=1 - sudo -E PATH="$PATH" bash -c './operator.sh' + run 20m sudo -E PATH="$PATH" bash -c './operator.sh' - echo "INFO: Run tests" - cmd="sudo -E PATH=\"$PATH\" bash -c " + echo "::info:: Run tests" + cmd="run 20m sudo -E PATH=\"$PATH\" bash -c " if [ -z "$runtimeclass" ]; then cmd+="'./tests_runner.sh'" else