Skip to content

Commit

Permalink
Fix inconsistancy behaviour of Backup and Restore hook execution
Browse files Browse the repository at this point in the history
Signed-off-by: allenxu404 <[email protected]>
  • Loading branch information
allenxu404 committed Oct 25, 2023
1 parent fd8350f commit dfc29fd
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 26 deletions.
9 changes: 4 additions & 5 deletions internal/hook/wait_exec_hook_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
if hook.Hook.WaitTimeout.Duration != 0 && time.Since(waitStart) > hook.Hook.WaitTimeout.Duration {
err := fmt.Errorf("hook %s in container %s expired before executing", hook.HookName, hook.Hook.Container)
hookLog.Error(err)
errors = append(errors, err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
cancel()
return
}
Expand All @@ -172,8 +172,9 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
}
if err := e.PodCommandExecutor.ExecutePodCommand(hookLog, podMap, pod.Namespace, pod.Name, hook.HookName, eh); err != nil {
hookLog.WithError(err).Error("Error executing hook")
err = fmt.Errorf("hook %s failed to execute, error: %s", hook.HookName, err)
errors = append(errors, err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
cancel()
return
}
Expand Down Expand Up @@ -222,9 +223,7 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
},
)
hookLog.Error(err)
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
errors = append(errors, err)
}
errors = append(errors, err)
}
}

Expand Down
14 changes: 7 additions & 7 deletions internal/hook/wait_exec_hook_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ func TestWaitExecHandleHooks(t *testing.T) {
Result(),
},
},
expectedErrors: []error{errors.New("pod hook error")},
expectedErrors: []error{errors.New("hook <from-annotation> failed to execute, error: pod hook error")},
},
{
name: "should return no error when hook from annotation fails with on error mode continue",
name: "should return error when hook from annotation fails with on error mode continue",
initialPod: builder.ForPod("default", "my-pod").
ObjectMeta(builder.WithAnnotations(
podRestoreHookCommandAnnotationKey, "/usr/bin/foo",
Expand Down Expand Up @@ -278,7 +278,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
Result(),
},
},
expectedErrors: nil,
expectedErrors: []error{errors.New("hook <from-annotation> failed to execute, error: pod hook error")},
},
{
name: "should return no error when hook from annotation executes after 10ms wait for container to start",
Expand Down Expand Up @@ -422,7 +422,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
},
},
{
name: "should return no error when spec hook with wait timeout expires with OnError mode Continue",
name: "should return error when spec hook with wait timeout expires with OnError mode Continue",
groupResource: "pods",
initialPod: builder.ForPod("default", "my-pod").
Containers(&v1.Container{
Expand All @@ -435,7 +435,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
},
}).
Result(),
expectedErrors: nil,
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
byContainer: map[string][]PodExecRestoreHook{
"container1": {
{
Expand Down Expand Up @@ -515,8 +515,8 @@ func TestWaitExecHandleHooks(t *testing.T) {
sharedHooksContextTimeout: time.Millisecond,
},
{
name: "should return no error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
expectedErrors: nil,
name: "should return error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
groupResource: "pods",
initialPod: builder.ForPod("default", "my-pod").
Containers(&v1.Container{
Expand Down
24 changes: 10 additions & 14 deletions pkg/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,6 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
discoveryHelper: kr.discoveryHelper,
resourcePriorities: kr.resourcePriorities,
resourceRestoreHooks: resourceRestoreHooks,
hooksErrs: make(chan error),
waitExecHookHandler: waitExecHookHandler,
hooksContext: hooksCtx,
hooksCancelFunc: hooksCancelFunc,
Expand Down Expand Up @@ -359,7 +358,6 @@ type restoreContext struct {
discoveryHelper discovery.Helper
resourcePriorities Priorities
hooksWaitGroup sync.WaitGroup
hooksErrs chan error
resourceRestoreHooks []hook.ResourceRestoreHook
waitExecHookHandler hook.WaitExecHookHandler
hooksContext go_context.Context
Expand Down Expand Up @@ -652,11 +650,7 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
ctx.log.Info("Waiting for all post-restore-exec hooks to complete")

ctx.hooksWaitGroup.Wait()
close(ctx.hooksErrs)
}()
for err := range ctx.hooksErrs {
errs.Velero = append(errs.Velero, err.Error())
}
ctx.log.Info("Done waiting for all post-restore exec hooks to complete")

return warnings, errs
Expand Down Expand Up @@ -1716,7 +1710,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
}

if groupResource == kuberesource.Pods {
ctx.waitExec(createdObj)
ctx.waitExec(createdObj, &errs)
}

// Wait for a CRD to be available for instantiating resources
Expand Down Expand Up @@ -1884,17 +1878,18 @@ func restorePodVolumeBackups(ctx *restoreContext, createdObj *unstructured.Unstr
}

// waitExec executes hooks in a restored pod's containers when they become ready.
func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured) {
func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured, errs *results.Result) {
ctx.hooksWaitGroup.Add(1)
go func() {
// Done() will only be called after all errors have been successfully sent
// on the ctx.podVolumeErrs channel.
defer ctx.hooksWaitGroup.Done()

ns := createdObj.GetNamespace()
pod := new(v1.Pod)
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(createdObj.UnstructuredContent(), &pod); err != nil {
ctx.log.WithError(err).Error("error converting unstructured pod")
ctx.hooksErrs <- err
errs.Add(ns, err)
return
}
execHooksByContainer, err := hook.GroupRestoreExecHooks(
Expand All @@ -1904,17 +1899,18 @@ func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured) {
)
if err != nil {
ctx.log.WithError(err).Errorf("error getting exec hooks for pod %s/%s", pod.Namespace, pod.Name)
ctx.hooksErrs <- err
errs.Add(ns, err)
return
}

if errs := ctx.waitExecHookHandler.HandleHooks(ctx.hooksContext, ctx.log, pod, execHooksByContainer); len(errs) > 0 {
ctx.log.WithError(kubeerrs.NewAggregate(errs)).Error("unable to successfully execute post-restore hooks")
if errors := ctx.waitExecHookHandler.HandleHooks(ctx.hooksContext, ctx.log, pod, execHooksByContainer); len(errors) > 0 {
ctx.log.WithError(kubeerrs.NewAggregate(errors)).Error("unable to successfully execute post-restore hooks")
ctx.hooksCancelFunc()

for _, err := range errs {
for _, err := range errors {
// Errors are already logged in the HandleHooks method.
ctx.hooksErrs <- err
errs.Add(ns, err)

}
}
}()
Expand Down

0 comments on commit dfc29fd

Please sign in to comment.