diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index a00973870d72..898fdbd5d16d 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -1528,8 +1528,75 @@ jobs: - name: set Ceph version in CephCluster manifest run: tests/scripts/github-action-helper.sh replace_ceph_image "deploy/examples/cluster-test.yaml" "${{ github.event.inputs.ceph-image }}" - - name: run RGW multisite test - uses: ./.github/workflows/rgw-multisite-test + - name: setup cluster resources + uses: ./.github/workflows/canary-test-config + + - name: install additional deps for object testing + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + sudo apt-get install -y s3cmd + + - name: use local disk into two partitions + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + export BLOCK="/dev/$(tests/scripts/github-action-helper.sh find_extra_block_dev)" + tests/scripts/github-action-helper.sh use_local_disk + tests/scripts/create-bluestore-partitions.sh --disk "$BLOCK" --osd-count 2 + sudo lsblk + + - name: deploy first cluster rook + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + tests/scripts/github-action-helper.sh deploy_first_rook_cluster + kubectl create -f deploy/examples/object-multisite-test.yaml + # wait for multisite-store to be created + tests/scripts/github-action-helper.sh wait_for cephobjectstore multisite-store rook-ceph 480 + + - name: prep second cluster pull realm config + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + cd deploy/examples/ + IP_ADDR=$(kubectl -n rook-ceph get svc rook-ceph-rgw-multisite-store -o jsonpath="{.spec.clusterIP}") + yq w -i -d1 object-multisite-pull-realm-test.yaml spec.pull.endpoint http://${IP_ADDR}:80 + BASE64_ACCESS_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.access-key}") + BASE64_SECRET_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.secret-key}") + sed -i 's/VzFjNFltMVdWRTFJWWxZelZWQT0=/'"$BASE64_ACCESS_KEY"'/g' object-multisite-pull-realm-test.yaml + sed -i 's/WVY1MFIxeExkbG84U3pKdlRseEZXVGR3T3k1U1dUSS9KaTFoUVE9PQ==/'"$BASE64_SECRET_KEY"'/g' object-multisite-pull-realm-test.yaml + + - name: deploy second cluster rook + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + tests/scripts/github-action-helper.sh deploy_second_rook_cluster + kubectl create -f deploy/examples/object-multisite-pull-realm-test.yaml + # wait for realms to be pulled and zone-b-multisite-store to be created + tests/scripts/github-action-helper.sh wait_for cephobjectstore zone-b-multisite-store rook-ceph-secondary 480 + + - name: wait for both ceph clusters to be ready + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + tests/scripts/github-action-helper.sh wait_for cephcluster my-cluster rook-ceph + tests/scripts/github-action-helper.sh wait_for cephcluster my-cluster rook-ceph-secondary + + - name: write an object to one cluster, read from the other + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + tests/scripts/github-action-helper.sh test_multisite_object_replication + + # if this test fails, it could mean the RGW `period get` or `period update` output has changed + - name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + ns_name_primary_object_store='"rook-ceph/multisite-store"' # double quotes intended + ns_name_secondary_object_store='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended + committed_msg="committing changes to RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary_object_store}" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary_object_store}" + ns_name_primary_object_zone='"rook-ceph/zone-a"' # double quotes intended + ns_name_secondary_object_zone='"rook-ceph-secondary/zone-b"' # double quotes intended + tests/scripts/github-action-helper.sh restart_operator + not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary_object_zone}" 600 + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary_object_zone}" 600 - name: collect common logs if: always() diff --git a/.github/workflows/rgw-multisite-test/action.yml b/.github/workflows/rgw-multisite-test/action.yml deleted file mode 100644 index 5fa7c81767f0..000000000000 --- a/.github/workflows/rgw-multisite-test/action.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: RGW Multisite Test -description: Reusable workflow to test RGW multisite integration - -runs: - using: "composite" - steps: - - name: setup cluster resources - uses: ./.github/workflows/canary-test-config - - - name: install additional deps for object testing - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - sudo apt-get install -y s3cmd - - - name: use local disk into two partitions - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - export BLOCK="/dev/$(tests/scripts/github-action-helper.sh find_extra_block_dev)" - tests/scripts/github-action-helper.sh use_local_disk - tests/scripts/create-bluestore-partitions.sh --disk "$BLOCK" --osd-count 2 - sudo lsblk - - - name: deploy first cluster rook - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - tests/scripts/github-action-helper.sh deploy_first_rook_cluster - kubectl create -f deploy/examples/object-multisite-test.yaml - # wait for multisite store to be created - tests/scripts/github-action-helper.sh wait_for_rgw rook-ceph - - - name: prep second cluster pull realm config - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - cd deploy/examples/ - IP_ADDR=$(kubectl -n rook-ceph get svc rook-ceph-rgw-multisite-store -o jsonpath="{.spec.clusterIP}") - yq w -i -d1 object-multisite-pull-realm-test.yaml spec.pull.endpoint http://${IP_ADDR}:80 - BASE64_ACCESS_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.access-key}") - BASE64_SECRET_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.secret-key}") - sed -i 's/VzFjNFltMVdWRTFJWWxZelZWQT0=/'"$BASE64_ACCESS_KEY"'/g' object-multisite-pull-realm-test.yaml - sed -i 's/WVY1MFIxeExkbG84U3pKdlRseEZXVGR3T3k1U1dUSS9KaTFoUVE9PQ==/'"$BASE64_SECRET_KEY"'/g' object-multisite-pull-realm-test.yaml - - - name: deploy second cluster rook - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - tests/scripts/github-action-helper.sh deploy_second_rook_cluster - kubectl create -f deploy/examples/object-multisite-pull-realm-test.yaml - # wait for realms to be pulled and zone-b-multisite-store to be created - tests/scripts/github-action-helper.sh wait_for_rgw rook-ceph-secondary - - - name: wait for ceph cluster 1 to be ready - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - tests/scripts/validate_cluster.sh osd 1 - kubectl -n rook-ceph get pods - - - name: write an object to one cluster, read from the other - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - tests/scripts/github-action-helper.sh test_multisite_object_replication - - # if this test fails, it could mean the RGW `period get` or `period update` output has changed - - name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - ns_name_primary_object_store='"rook-ceph/multisite-store"' # double quotes intended - ns_name_secondary_object_store='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended - committed_msg="committing changes to RGW configuration period for CephObjectStore" - tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary_object_store}" - tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary_object_store}" - ns_name_primary_object_zone='"rook-ceph/zone-a"' # double quotes intended - ns_name_secondary_object_zone='"rook-ceph-secondary/zone-b"' # double quotes intended - tests/scripts/github-action-helper.sh restart_operator - not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" - tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary_object_zone}" 600 - tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary_object_zone}" 600 diff --git a/tests/scripts/github-action-helper.sh b/tests/scripts/github-action-helper.sh index 26a2b59e0646..fac62a6fa78d 100755 --- a/tests/scripts/github-action-helper.sh +++ b/tests/scripts/github-action-helper.sh @@ -476,23 +476,28 @@ function deploy_second_rook_cluster() { deploy_toolbox } -function wait_for_rgw() { - for _ in {1..120}; do - if [ "$(kubectl -n "$1" get pod -l app=rook-ceph-rgw --no-headers --field-selector=status.phase=Running | wc -l)" -ge 1 ]; then - echo "rgw pod is found" - break +function wait_for() { + local kind=${1?kind is required} + local name=${2?resource name is required} + local ns=${3:-rook-ceph} + local timeout=${4:-120} + local status=${5:-Ready} + + local start_time="${SECONDS}" + local elapsed_time=0 + while [[ $elapsed_time -lt $timeout ]]; do + if [[ "$(kubectl -n "$ns" get "$kind" "$name" -o 'jsonpath={..status.phase}')" == "$status" ]]; then + echo "${kind}/${name} in ${ns} is ${status} - elapsed time ${elapsed_time}s" + return 0 fi - echo "waiting for rgw pods" - sleep 5 - done - for _ in {1..120}; do - if [ "$(kubectl -n "$1" get deployment -l app=rook-ceph-rgw -o yaml | yq read - 'items[0].status.readyReplicas')" -ge 1 ]; then - echo "rgw is ready" - break - fi - echo "waiting for rgw becomes ready" + + elapsed_time=$((SECONDS - start_time)) + echo "waiting for ${kind}/${name} in ${ns} to be ${status} - elapsed time ${elapsed_time}s" sleep 5 done + + echo "timed out waiting for ${kind}/${name} in ${ns} to be ${status} - elapsed time ${elapsed_time}s " >&2 + exit 1 } function verify_operator_log_message() { @@ -561,7 +566,7 @@ function write_object_read_from_replica_cluster() { # a direct sub-shell. S3CMD_ERROR=0 ( - sleep 60 + sleep 300 kill -s SIGUSR1 $$ ) 2>/dev/null & trap "{ S3CMD_ERROR=1; break; }" SIGUSR1