Skip to content

Commit

Permalink
The backup cleaner can't clean the CR at the schedule phase before …
Browse files Browse the repository at this point in the history
…task start (#5967) (#5971)

Co-authored-by: RidRisR <[email protected]>
  • Loading branch information
ti-chi-bot and RidRisR authored Dec 10, 2024
1 parent dff5f61 commit 7d05146
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 1 deletion.
6 changes: 5 additions & 1 deletion pkg/backup/backup/backup_cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ func (bc *backupCleaner) StopLogBackup(backup *v1alpha1.Backup) error {
return fmt.Errorf("backup %s/%s spec.BR shouldn't be nil", backup.GetNamespace(), backup.GetName())
}
if !v1alpha1.IsLogBackupAlreadyStart(backup) {
return nil
return bc.statusUpdater.Update(backup, &v1alpha1.BackupCondition{
Command: v1alpha1.LogStopCommand,
Type: v1alpha1.BackupComplete,
Status: corev1.ConditionTrue,
}, nil)
}
if v1alpha1.IsLogBackupAlreadyStop(backup) {
return nil
Expand Down
83 changes: 83 additions & 0 deletions tests/e2e/br/br.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,63 @@ var _ = ginkgo.Describe("Backup and Restore", func() {
framework.ExpectEqual(cleaned, true, "storage should be cleaned")
})

ginkgo.It("test stop the log backup on schedule phase", func() {
backupClusterName := "log-backup"
backupVersion := utilimage.TiDBLatest
enableTLS := false
skipCA := false
backupName := backupClusterName
typ := strings.ToLower(typeBR)

ns := f.Namespace.Name
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

ginkgo.By("Create log-backup.enable TiDB cluster for log backup")
err := createLogBackupEnableTidbCluster(f, backupClusterName, backupVersion, enableTLS, skipCA)
framework.ExpectNoError(err)

ginkgo.By("Wait for backup TiDB cluster ready")
err = utiltidbcluster.WaitForTCConditionReady(f.ExtClient, ns, backupClusterName, tidbReadyTimeout, 0)
framework.ExpectNoError(err)

ginkgo.By("Create RBAC for log backup")
err = createRBAC(f)
framework.ExpectNoError(err)

ginkgo.By("Start log backup")
backup, err := createBackupAndWaitForSchedule(f, backupName, backupClusterName, typ, func(backup *v1alpha1.Backup) {
// A tricky way to keep the log backup from starting.
backup.Spec.ToolImage = "null:invalid"
backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeRetain
backup.Spec.Mode = v1alpha1.BackupModeLog
})
framework.ExpectNoError(err)
framework.ExpectEqual(backup.Status.CommitTs, "")

ginkgo.By("Delete backup")
err = deleteBackup(f, backupName)
framework.ExpectNoError(err)

// To make sure the task is deleted.
ginkgo.By("Start log backup the second time")
backup, err = createBackupAndWaitForComplete(f, backupName, backupClusterName, typ, func(backup *v1alpha1.Backup) {
backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeDelete
backup.Spec.Mode = v1alpha1.BackupModeLog
})
framework.ExpectNoError(err)
framework.ExpectNotEqual(backup.Status.CommitTs, "")

ginkgo.By("Delete backup")
err = deleteBackup(f, backupName)
framework.ExpectNoError(err)

ginkgo.By("Check if all backup files in storage is deleted")
cleaned, err := f.Storage.IsDataCleaned(ctx, ns, backup.Spec.S3.Prefix) // now we only use s3
framework.ExpectNoError(err)
framework.ExpectEqual(cleaned, true, "storage should be cleaned")
})

// TODO: tikv error:[ERROR] [mod.rs:747] ["Status server error: TLS handshake error"], will open this test when this is fixed.
// ginkgo.It("Log backup progress track with tls cluster", func() {
// backupVersion := utilimage.TiDBLatest
Expand Down Expand Up @@ -1382,6 +1439,32 @@ func createBackupAndWaitForRunning(f *e2eframework.Framework, name, tcName, typ
return f.ExtClient.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{})
}

func createBackupAndWaitForSchedule(f *e2eframework.Framework, name, tcName, typ string, configure func(*v1alpha1.Backup)) (*v1alpha1.Backup, error) {
ns := f.Namespace.Name
// secret to visit tidb cluster
s := brutil.GetSecret(ns, name, "")
if _, err := f.ClientSet.CoreV1().Secrets(ns).Create(context.TODO(), s, metav1.CreateOptions{}); err != nil {
return nil, err
}

backupFolder := time.Now().Format(time.RFC3339)
cfg := f.Storage.Config(ns, backupFolder)
backup := brutil.GetBackup(ns, name, tcName, typ, cfg)

if configure != nil {
configure(backup)
}

if _, err := f.ExtClient.PingcapV1alpha1().Backups(ns).Create(context.TODO(), backup, metav1.CreateOptions{}); err != nil {
return nil, err
}

if err := brutil.WaitForBackupOnScheduled(f.ExtClient, ns, name, backupCompleteTimeout); err != nil {
return backup, err
}
return f.ExtClient.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{})
}

func getBackoffRetryNum(f *e2eframework.Framework, backup *v1alpha1.Backup) (int, error) {
ns := f.Namespace.Name
name := backup.Name
Expand Down
27 changes: 27 additions & 0 deletions tests/e2e/br/framework/br/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,33 @@ func WaitForBackupOnRunning(c versioned.Interface, ns, name string, timeout time
return nil
}

// WaitForBackupOnRunning will poll and wait until timeout or backup phause is schedule
func WaitForBackupOnScheduled(c versioned.Interface, ns, name string, timeout time.Duration) error {
if err := wait.PollImmediate(poll, timeout, func() (bool, error) {
b, err := c.PingcapV1alpha1().Backups(ns).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
return false, err
}
if b.Status.Phase == v1alpha1.BackupScheduled {
return true, nil
}

for _, cond := range b.Status.Conditions {
switch cond.Type {
case v1alpha1.BackupFailed, v1alpha1.BackupInvalid:
if cond.Status == corev1.ConditionTrue {
return false, fmt.Errorf("backup is failed, reason: %s, message: %s", cond.Reason, cond.Message)
}
default: // do nothing
}
}
return false, nil
}); err != nil {
return fmt.Errorf("can't wait for backup scheduled: %v", err)
}
return nil
}

// WaitForBackupFailed will poll and wait until timeout or backup failed condition is true
func WaitForBackupFailed(c versioned.Interface, ns, name string, timeout time.Duration) error {
if err := wait.PollImmediate(poll, timeout, func() (bool, error) {
Expand Down

0 comments on commit 7d05146

Please sign in to comment.