From 307a0223a341ca9d0e2bd364076be06a0de84cee Mon Sep 17 00:00:00 2001 From: Shubham Pampattiwar Date: Fri, 28 Jun 2024 12:14:35 -0700 Subject: [PATCH] Skip PV patch step in Restoe workflow for WaitForFirstConsumer VolumeBindingMode Pending state PVCs Signed-off-by: Shubham Pampattiwar add changelog file Signed-off-by: Shubham Pampattiwar change log level and add more detailed comments Signed-off-by: Shubham Pampattiwar make update Signed-off-by: Shubham Pampattiwar --- .../unreleased/7953-shubham-pampattiwar | 1 + .../restore_finalizer_controller.go | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 changelogs/unreleased/7953-shubham-pampattiwar diff --git a/changelogs/unreleased/7953-shubham-pampattiwar b/changelogs/unreleased/7953-shubham-pampattiwar new file mode 100644 index 00000000000..b7a42add788 --- /dev/null +++ b/changelogs/unreleased/7953-shubham-pampattiwar @@ -0,0 +1 @@ +Skip PV patch step in Restoe workflow for WaitForFirstConsumer VolumeBindingMode Pending state PVCs \ No newline at end of file diff --git a/pkg/controller/restore_finalizer_controller.go b/pkg/controller/restore_finalizer_controller.go index d9aaaa30a62..236009a8f84 100644 --- a/pkg/controller/restore_finalizer_controller.go +++ b/pkg/controller/restore_finalizer_controller.go @@ -22,6 +22,8 @@ import ( "sync" "time" + storagev1api "k8s.io/api/storage/v1" + "github.com/pkg/errors" "github.com/sirupsen/logrus" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -304,6 +306,26 @@ func (ctx *finalizerContext) patchDynamicPVWithVolumeInfo() (errs results.Result return false, err } + // We are handling a common but specific scenario where a PVC is in a pending state and uses a storage class with + // VolumeBindingMode set to WaitForFirstConsumer. In this case, the PV patch step is skipped to avoid + // failures due to the PVC not being bound, which could cause a timeout and result in a failed restore. + if pvc != nil && pvc.Status.Phase == v1.ClaimPending { + // check if storage class used has VolumeBindingMode as WaitForFirstConsumer + scName := *pvc.Spec.StorageClassName + sc := &storagev1api.StorageClass{} + err = ctx.crClient.Get(context.Background(), client.ObjectKey{Name: scName}, sc) + + if err != nil { + errs.Add(restoredNamespace, err) + } + // skip PV patch step for this scenario + // because pvc would not be bound and the PV patch step would fail due to timeout thus failing the restore + if *sc.VolumeBindingMode == storagev1api.VolumeBindingWaitForFirstConsumer { + log.Warnf("skipping PV patch to restore custom reclaim policy, if any: StorageClass %s used by PVC %s has VolumeBindingMode set to WaitForFirstConsumer, and the PVC is also in a pending state", scName, pvc.Name) + return true, nil + } + } + if pvc.Status.Phase != v1.ClaimBound || pvc.Spec.VolumeName == "" { log.Debugf("PVC: %s not ready", pvc.Name) return false, nil