Skip to content

Commit

Permalink
fix: add retry for kill pod
Browse files Browse the repository at this point in the history
Signed-off-by: henrywangx <[email protected]>
  • Loading branch information
henrywangx committed Dec 22, 2021
1 parent abf2dd5 commit 8de0afa
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
20 changes: 19 additions & 1 deletion cmd/argoexec/commands/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import (
"github.com/argoproj/pkg/stats"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/util/wait"

errorsutil "github.com/argoproj/argo-workflows/v3/util/errors"
"github.com/argoproj/argo-workflows/v3/workflow/executor"
)

func NewWaitCommand() *cobra.Command {
Expand All @@ -31,7 +35,21 @@ func waitContainer(ctx context.Context) error {
stats.StartStatsTicker(5 * time.Minute)

defer func() {
if err := wfExecutor.KillSidecars(ctx); err != nil {
// Killing sidecar containers
retryCnt := 0
err := wait.ExponentialBackoff(executor.ExecutorRetry, func() (bool, error) {
err := wfExecutor.KillSidecars(ctx)
if err == nil {
return true, nil
}
if errorsutil.IsTransientErr(err) {
log.WithError(err).WithField("retryCnt", retryCnt).Warn("fail to kill sidecar")
retryCnt++
return false, nil
}
return false, err
})
if err != nil {
wfExecutor.AddError(err)
}
}()
Expand Down
17 changes: 12 additions & 5 deletions workflow/executor/k8sapi/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
errorsutil "github.com/argoproj/argo-workflows/v3/util/errors"
waitutil "github.com/argoproj/argo-workflows/v3/util/wait"
"github.com/argoproj/argo-workflows/v3/workflow/common"
"github.com/argoproj/argo-workflows/v3/workflow/executor"
execcommon "github.com/argoproj/argo-workflows/v3/workflow/executor/common"
)

Expand Down Expand Up @@ -110,11 +111,17 @@ func (c *k8sAPIClient) GetContainerStatuses(ctx context.Context) (*corev1.Pod, [

func (c *k8sAPIClient) KillContainer(pod *corev1.Pod, container *corev1.ContainerStatus, sig syscall.Signal) error {
command := []string{"/bin/sh", "-c", fmt.Sprintf("kill -%d 1", sig)}
exec, err := common.ExecPodContainer(c.config, c.namespace, c.podName, container.Name, true, true, command...)
if err != nil {
return err
}
_, _, err = common.GetExecutorOutput(exec)
err := wait.ExponentialBackoff(executor.ExecutorRetry, func() (bool, error) {
exec, err := common.ExecPodContainer(c.config, c.namespace, c.podName, container.Name, true, true, command...)
if err != nil {
return false, nil
}
_, _, err = common.GetExecutorOutput(exec)
if err != nil {
return false, err
}
return true, nil
})
return err
}

Expand Down

0 comments on commit 8de0afa

Please sign in to comment.