From 7d05146d8736db3d7c17fbf1869a90f4f87babf3 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 10 Dec 2024 17:51:07 +0800 Subject: [PATCH] The backup cleaner can't clean the CR at the `schedule` phase before task start (#5967) (#5971) Co-authored-by: RidRisR <79858083+RidRisR@users.noreply.github.com> --- pkg/backup/backup/backup_cleaner.go | 6 ++- tests/e2e/br/br.go | 83 +++++++++++++++++++++++++++++ tests/e2e/br/framework/br/wait.go | 27 ++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/pkg/backup/backup/backup_cleaner.go b/pkg/backup/backup/backup_cleaner.go index d80967dd2c..8f859f6fe6 100644 --- a/pkg/backup/backup/backup_cleaner.go +++ b/pkg/backup/backup/backup_cleaner.go @@ -73,7 +73,11 @@ func (bc *backupCleaner) StopLogBackup(backup *v1alpha1.Backup) error { return fmt.Errorf("backup %s/%s spec.BR shouldn't be nil", backup.GetNamespace(), backup.GetName()) } if !v1alpha1.IsLogBackupAlreadyStart(backup) { - return nil + return bc.statusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Command: v1alpha1.LogStopCommand, + Type: v1alpha1.BackupComplete, + Status: corev1.ConditionTrue, + }, nil) } if v1alpha1.IsLogBackupAlreadyStop(backup) { return nil diff --git a/tests/e2e/br/br.go b/tests/e2e/br/br.go index 8308f11bd5..cab5c902e6 100644 --- a/tests/e2e/br/br.go +++ b/tests/e2e/br/br.go @@ -667,6 +667,63 @@ var _ = ginkgo.Describe("Backup and Restore", func() { framework.ExpectEqual(cleaned, true, "storage should be cleaned") }) + ginkgo.It("test stop the log backup on schedule phase", func() { + backupClusterName := "log-backup" + backupVersion := utilimage.TiDBLatest + enableTLS := false + skipCA := false + backupName := backupClusterName + typ := strings.ToLower(typeBR) + + ns := f.Namespace.Name + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ginkgo.By("Create log-backup.enable TiDB cluster for log backup") + err := createLogBackupEnableTidbCluster(f, backupClusterName, backupVersion, enableTLS, skipCA) + framework.ExpectNoError(err) + + ginkgo.By("Wait for backup TiDB cluster ready") + err = utiltidbcluster.WaitForTCConditionReady(f.ExtClient, ns, backupClusterName, tidbReadyTimeout, 0) + framework.ExpectNoError(err) + + ginkgo.By("Create RBAC for log backup") + err = createRBAC(f) + framework.ExpectNoError(err) + + ginkgo.By("Start log backup") + backup, err := createBackupAndWaitForSchedule(f, backupName, backupClusterName, typ, func(backup *v1alpha1.Backup) { + // A tricky way to keep the log backup from starting. + backup.Spec.ToolImage = "null:invalid" + backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeRetain + backup.Spec.Mode = v1alpha1.BackupModeLog + }) + framework.ExpectNoError(err) + framework.ExpectEqual(backup.Status.CommitTs, "") + + ginkgo.By("Delete backup") + err = deleteBackup(f, backupName) + framework.ExpectNoError(err) + + // To make sure the task is deleted. + ginkgo.By("Start log backup the second time") + backup, err = createBackupAndWaitForComplete(f, backupName, backupClusterName, typ, func(backup *v1alpha1.Backup) { + backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeDelete + backup.Spec.Mode = v1alpha1.BackupModeLog + }) + framework.ExpectNoError(err) + framework.ExpectNotEqual(backup.Status.CommitTs, "") + + ginkgo.By("Delete backup") + err = deleteBackup(f, backupName) + framework.ExpectNoError(err) + + ginkgo.By("Check if all backup files in storage is deleted") + cleaned, err := f.Storage.IsDataCleaned(ctx, ns, backup.Spec.S3.Prefix) // now we only use s3 + framework.ExpectNoError(err) + framework.ExpectEqual(cleaned, true, "storage should be cleaned") + }) + // TODO: tikv error:[ERROR] [mod.rs:747] ["Status server error: TLS handshake error"], will open this test when this is fixed. // ginkgo.It("Log backup progress track with tls cluster", func() { // backupVersion := utilimage.TiDBLatest @@ -1382,6 +1439,32 @@ func createBackupAndWaitForRunning(f *e2eframework.Framework, name, tcName, typ return f.ExtClient.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{}) } +func createBackupAndWaitForSchedule(f *e2eframework.Framework, name, tcName, typ string, configure func(*v1alpha1.Backup)) (*v1alpha1.Backup, error) { + ns := f.Namespace.Name + // secret to visit tidb cluster + s := brutil.GetSecret(ns, name, "") + if _, err := f.ClientSet.CoreV1().Secrets(ns).Create(context.TODO(), s, metav1.CreateOptions{}); err != nil { + return nil, err + } + + backupFolder := time.Now().Format(time.RFC3339) + cfg := f.Storage.Config(ns, backupFolder) + backup := brutil.GetBackup(ns, name, tcName, typ, cfg) + + if configure != nil { + configure(backup) + } + + if _, err := f.ExtClient.PingcapV1alpha1().Backups(ns).Create(context.TODO(), backup, metav1.CreateOptions{}); err != nil { + return nil, err + } + + if err := brutil.WaitForBackupOnScheduled(f.ExtClient, ns, name, backupCompleteTimeout); err != nil { + return backup, err + } + return f.ExtClient.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{}) +} + func getBackoffRetryNum(f *e2eframework.Framework, backup *v1alpha1.Backup) (int, error) { ns := f.Namespace.Name name := backup.Name diff --git a/tests/e2e/br/framework/br/wait.go b/tests/e2e/br/framework/br/wait.go index 72e510c62b..7d90ed7335 100644 --- a/tests/e2e/br/framework/br/wait.go +++ b/tests/e2e/br/framework/br/wait.go @@ -121,6 +121,33 @@ func WaitForBackupOnRunning(c versioned.Interface, ns, name string, timeout time return nil } +// WaitForBackupOnRunning will poll and wait until timeout or backup phause is schedule +func WaitForBackupOnScheduled(c versioned.Interface, ns, name string, timeout time.Duration) error { + if err := wait.PollImmediate(poll, timeout, func() (bool, error) { + b, err := c.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{}) + if err != nil { + return false, err + } + if b.Status.Phase == v1alpha1.BackupScheduled { + return true, nil + } + + for _, cond := range b.Status.Conditions { + switch cond.Type { + case v1alpha1.BackupFailed, v1alpha1.BackupInvalid: + if cond.Status == corev1.ConditionTrue { + return false, fmt.Errorf("backup is failed, reason: %s, message: %s", cond.Reason, cond.Message) + } + default: // do nothing + } + } + return false, nil + }); err != nil { + return fmt.Errorf("can't wait for backup scheduled: %v", err) + } + return nil +} + // WaitForBackupFailed will poll and wait until timeout or backup failed condition is true func WaitForBackupFailed(c versioned.Interface, ns, name string, timeout time.Duration) error { if err := wait.PollImmediate(poll, timeout, func() (bool, error) {