From 6eb2d1121ab0b4cf3792c7cefe2cdbb786a6bc61 Mon Sep 17 00:00:00 2001 From: Peter Razumovsky Date: Wed, 9 Oct 2024 17:52:52 +0400 Subject: [PATCH 1/3] osd: import keyring file on activate to ceph auth if not imported yet If host path keyring file exists and created during osd-prepare but not imported to ceph auth then rook-ceph-osd activate init container will create new keyring in ceph auth by ceph auth get-or-create command. To avoid keyrings difference in host path and in ceph auth, consider importing key from host path keyring file if no keyring found in ceph auth. Resolves: #14825 Signed-off-by: Peter Razumovsky --- pkg/operator/ceph/cluster/osd/spec.go | 39 +++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/pkg/operator/ceph/cluster/osd/spec.go b/pkg/operator/ceph/cluster/osd/spec.go index e67449f8874c..6642802172ef 100644 --- a/pkg/operator/ceph/cluster/osd/spec.go +++ b/pkg/operator/ceph/cluster/osd/spec.go @@ -107,11 +107,46 @@ OSD_ID="$ROOK_OSD_ID" OSD_UUID=%s OSD_STORE_FLAG="%s" OSD_DATA_DIR=/var/lib/ceph/osd/ceph-"$OSD_ID" +KEYRING_FILE="$OSD_DATA_DIR"/keyring CV_MODE=%s DEVICE="$%s" -# create new keyring -ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring +# In rare cases keyring file created with prepare-osd but did not +# being stored in ceph auth system therefore we need to import it +# from keyring file instead of creating new one +if ! ceph -n client.admin auth get osd."$OSD_ID" -k /etc/ceph/admin-keyring-store/keyring; then + if [ -f "$KEYRING_FILE" ]; then + # import keyring from existing file + TMP_DIR=$(mktemp -d) + + python3 -c " +import configparser + +config = configparser.ConfigParser() +config.read('$KEYRING_FILE') + +if not config.has_section('osd.$OSD_ID'): + exit() + +config['osd.$OSD_ID'] = {'key': config['osd.$OSD_ID']['key'], 'caps mon': '\"allow profile osd\"', 'caps mgr': '\"allow profile osd\"', 'caps osd': '\"allow *\"'} + +with open('$TMP_DIR/keyring', 'w') as configfile: + config.write(configfile) +" + if [ -f "$TMP_DIR"/keyring ]; then + cat "$TMP_DIR"/keyring + ceph -n client.admin auth import -i "$TMP_DIR"/keyring -k /etc/ceph/admin-keyring-store/keyring + else + # create new keyring if no keyring file found + ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring + fi + + rm --recursive --force "$TMP_DIR" + else + # create new keyring if no keyring file found + ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring + fi +fi # active the osd with ceph-volume if [[ "$CV_MODE" == "lvm" ]]; then From 62ffcd4cbe391373aacffa215ad85a1bc61ab44e Mon Sep 17 00:00:00 2001 From: Peter Razumovsky Date: Tue, 29 Oct 2024 15:30:35 +0400 Subject: [PATCH 2/3] osd: remove excess if-else for keyring file import in activate script Remove unnecessary if-else statement inside "if keyring file exists" condition of osd activate script. Resolves: #14825 Signed-off-by: Peter Razumovsky --- pkg/operator/ceph/cluster/osd/spec.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pkg/operator/ceph/cluster/osd/spec.go b/pkg/operator/ceph/cluster/osd/spec.go index 6642802172ef..79c066bf290e 100644 --- a/pkg/operator/ceph/cluster/osd/spec.go +++ b/pkg/operator/ceph/cluster/osd/spec.go @@ -133,13 +133,9 @@ config['osd.$OSD_ID'] = {'key': config['osd.$OSD_ID']['key'], 'caps mon': '\"all with open('$TMP_DIR/keyring', 'w') as configfile: config.write(configfile) " - if [ -f "$TMP_DIR"/keyring ]; then - cat "$TMP_DIR"/keyring - ceph -n client.admin auth import -i "$TMP_DIR"/keyring -k /etc/ceph/admin-keyring-store/keyring - else - # create new keyring if no keyring file found - ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring - fi + + cat "$TMP_DIR"/keyring + ceph -n client.admin auth import -i "$TMP_DIR"/keyring -k /etc/ceph/admin-keyring-store/keyring rm --recursive --force "$TMP_DIR" else From 12613bc2fa80a5792272e78b7152e5c1546824eb Mon Sep 17 00:00:00 2001 From: Peter Razumovsky Date: Tue, 12 Nov 2024 21:46:20 +0400 Subject: [PATCH 3/3] osd: add osd auth recovery from file canary job test Add test to canary-integration-test scenarios which removes osd auth and then recovers it on osd activation from keyring file. Resolves: #14825 Signed-off-by: Peter Razumovsky --- .github/workflows/canary-integration-test.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index ad5e75125fef..42a7d1207dab 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -70,6 +70,21 @@ jobs: mgr_raw=$(kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr) timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- curl --silent --show-error ${mgr_raw%%:*}:9283; do echo 'waiting for mgr prometheus exporter to be ready' && sleep 1; done" + - name: test osd.0 auth recovery from keyring file + run: | + toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') + osd_id=0 + osd_pod=$(kubectl get pod -l app=rook-ceph-osd,osd=$osd_id -n rook-ceph -o jsonpath='{.items[*].metadata.name}') + if [ $osd_pod ]; then + timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph auth del osd.$osd_id ; do sleep 1 && echo 'waiting for osd auth to be deleted'; done"; + kubectl -n rook-ceph delete pod $osd_pod; + timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph auth get osd.$osd_id ; do sleep 1 && echo 'waiting for osd auth to be recovered'; done"; + osd_pod=$(kubectl get pod -l app=rook-ceph-osd,osd=$osd_id -n rook-ceph -o jsonpath='{.items[*].metadata.name}'); + kubectl -n rook-ceph wait --for=condition=Ready pod/$osd_pod --timeout=120s; + else + echo "osd $osd_id not found, skipping test"; + fi + - name: test external script create-external-cluster-resources.py run: | toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')