diff --git a/pkg/applicationmanager/controllers/applicationrestore.go b/pkg/applicationmanager/controllers/applicationrestore.go index bcec1bdbed..c532f31a69 100644 --- a/pkg/applicationmanager/controllers/applicationrestore.go +++ b/pkg/applicationmanager/controllers/applicationrestore.go @@ -287,6 +287,8 @@ func (a *ApplicationRestoreController) Reconcile(ctx context.Context, request re } // This channel listens on two values if the CR's time stamp to be updated or to quit the go routine updateCr := make(chan int, utils.RestoreCrChannelBufferSize) + // This go routine updates the CR's timestamp every 15 minutes based on the input + go a.restoreCrTimestampUpdate(restore, updateCr) if err = a.handle(context.TODO(), restore, updateCr); err != nil && err != errResourceBusy { logrus.Errorf("%s: %s/%s: %s", reflect.TypeOf(a), restore.Namespace, restore.Name, err) // The restore CR is done with the go-routine, lets purge it @@ -506,6 +508,23 @@ func (a *ApplicationRestoreController) updateRestoreCRInVolumeStage( func (a *ApplicationRestoreController) restoreVolumes(restore *storkapi.ApplicationRestore, updateCr chan int) error { restore.Status.Stage = storkapi.ApplicationRestoreStageVolumes + var err error + namespacedName := types.NamespacedName{} + namespacedName.Namespace = restore.Namespace + namespacedName.Name = restore.Name + // In case of kdmp driver we may delete resources first before operating on volumes + // Hence we need to update the status. + // This is to prevent px-backup to fail the restore declaring timeout. + restore, err = a.updateRestoreCRInVolumeStage( + namespacedName, + storkapi.ApplicationRestoreStatusInProgress, + storkapi.ApplicationRestoreStageVolumes, + "Volume or Resource(kdmp) restores are in progress", + nil, + ) + if err != nil { + return err + } backup, err := storkops.Instance().GetApplicationBackup(restore.Spec.BackupName, restore.Namespace) if err != nil { return fmt.Errorf("error getting backup spec for restore: %v", err) @@ -565,9 +584,6 @@ func (a *ApplicationRestoreController) restoreVolumes(restore *storkapi.Applicat if restore.Status.Volumes == nil { restore.Status.Volumes = make([]*storkapi.ApplicationRestoreVolumeInfo, 0) } - namespacedName := types.NamespacedName{} - namespacedName.Namespace = restore.Namespace - namespacedName.Name = restore.Name if len(restore.Status.Volumes) != pvcCount { for driverName, vInfos := range backupVolumeInfoMappings { backupVolInfos := vInfos @@ -643,7 +659,7 @@ func (a *ApplicationRestoreController) restoreVolumes(restore *storkapi.Applicat } err = a.resourceCollector.DeleteResources( a.dynamicInterface, - tempObjects, nil) + tempObjects, updateCr) if err != nil { return err } @@ -1342,8 +1358,6 @@ func (a *ApplicationRestoreController) applyResources( // This channel listens on two values if the CR's time stamp to be updated or to quit the go routine startTime := time.Now() - // this go routine updates the CR's timestamp every 15 minutes - go a.restoreCrTimestampUpdate(restore, updateCr) for _, o := range objects { elapsedTime := time.Since(startTime) if elapsedTime > utils.TimeoutUpdateRestoreCrTimestamp { @@ -1395,7 +1409,8 @@ func (a *ApplicationRestoreController) applyResources( if elapsedTime > utils.TimeoutUpdateRestoreCrProgress { restore, err = a.updateRestoreCR(restore, namespacedName, len(tempResourceList), - string(storkapi.ApplicationRestoreResourceApplying)) + string(storkapi.ApplicationRestoreResourceApplying), + restore.Status.ResourceCount) if err != nil { // no need to return error lets wait for next turn to write timestamp. log.ApplicationRestoreLog(restore).Errorf("%v: failed to update timestamp and restored resource count", fn) @@ -1481,6 +1496,7 @@ func (a *ApplicationRestoreController) updateRestoreCR( namespacedName types.NamespacedName, restoredCount int, resourceRestoreStage string, + totalResourceCount int, ) (*storkapi.ApplicationRestore, error) { fn := "updateRestoreCR" //restore := &storkapi.ApplicationRestore{} @@ -1488,6 +1504,7 @@ func (a *ApplicationRestoreController) updateRestoreCR( for i := 0; i < maxRetry; i++ { restore.Status.RestoredResourceCount = restoredCount restore.Status.ResourceRestoreState = storkapi.ApplicationRestoreResourceStateType(resourceRestoreStage) + restore.Status.ResourceCount = totalResourceCount restore.Status.LastUpdateTimestamp = metav1.Now() err = a.client.Update(context.TODO(), restore) if err != nil { diff --git a/pkg/resourcecollector/resourcecollector.go b/pkg/resourcecollector/resourcecollector.go index 52f115e59e..6e7e9048d7 100644 --- a/pkg/resourcecollector/resourcecollector.go +++ b/pkg/resourcecollector/resourcecollector.go @@ -1045,11 +1045,14 @@ func (r *ResourceCollector) DeleteResources( deleteStart := metav1.Now() startTime := time.Now() for _, object := range objects { - elapsedTime := time.Since(startTime) - if elapsedTime > utils.TimeoutUpdateRestoreCrTimestamp { - updateTimestamp <- utils.UpdateRestoreCrTimestampInDeleteResourcePath - startTime = time.Now() + if updateTimestamp != nil { + elapsedTime := time.Since(startTime) + if elapsedTime > utils.TimeoutUpdateRestoreCrTimestamp { + updateTimestamp <- utils.UpdateRestoreCrTimestampInDeleteResourcePath + startTime = time.Now() + } } + // Don't delete objects that support merging if r.mergeSupportedForResource(object) { continue @@ -1075,10 +1078,12 @@ func (r *ResourceCollector) DeleteResources( // Then wait for them to actually be deleted for _, object := range objects { - elapsedTime := time.Since(startTime) - if elapsedTime > utils.TimeoutUpdateRestoreCrTimestamp { - updateTimestamp <- utils.UpdateRestoreCrTimestampInDeleteResourcePath - startTime = time.Now() + if updateTimestamp != nil { + elapsedTime := time.Since(startTime) + if elapsedTime > utils.TimeoutUpdateRestoreCrTimestamp { + updateTimestamp <- utils.UpdateRestoreCrTimestampInDeleteResourcePath + startTime = time.Now() + } } // Objects that support merging aren't deleted if r.mergeSupportedForResource(object) {