Skip to content

Commit

Permalink
fix: Skip execution control for exit nodes. Fixes #13060
Browse files Browse the repository at this point in the history
Signed-off-by: oninowang <[email protected]>
  • Loading branch information
oninowang committed Jun 25, 2024
1 parent 56b8628 commit a5c011f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
4 changes: 2 additions & 2 deletions pkg/apis/workflow/v1alpha1/workflow_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2379,8 +2379,8 @@ func (n NodeStatus) IsDaemoned() bool {
}

// IsExitNode returns whether or not node run as exit handler.
func (ws NodeStatus) IsExitNode() bool {
return strings.HasSuffix(ws.DisplayName, ".onExit")
func (n NodeStatus) IsExitNode() bool {
return strings.HasSuffix(n.DisplayName, ".onExit")
}

func (n NodeStatus) Succeeded() bool {
Expand Down
2 changes: 1 addition & 1 deletion workflow/controller/exec_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (woc *wfOperationCtx) handleExecutionControlError(nodeID string, wfNodesLoc
// if node is a pod created from ContainerSet template
// then need to fail child nodes so they will not hang in Pending after pod deletion
for _, child := range children {
if !child.IsExitNode() && !child.Fulfilled() {
if !child.Fulfilled() {
woc.markNodePhase(child.Name, wfv1.NodeFailed, errorMsg)
}
}
Expand Down
16 changes: 16 additions & 0 deletions workflow/controller/exit_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,19 @@ func (woc *wfOperationCtx) resolveExitTmplArgument(args wfv1.Arguments, prefix s
}
return newArgs, nil
}

// IsPartOfExitHandler returns whether node is part of exit handler.
func (woc *wfOperationCtx) IsPartOfExitHandler(node wfv1.NodeStatus) bool {
currentNode := &node
for !currentNode.IsExitNode() {
if currentNode.BoundaryID == "" {
return false
}
boundaryNode, err := woc.wf.Status.Nodes.Get(currentNode.BoundaryID)
if err != nil {
woc.log.Panicf("was unable to obtain node for %s", currentNode.BoundaryID)
}
currentNode = boundaryNode
}
return true
}
2 changes: 0 additions & 2 deletions workflow/controller/exit_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,6 @@ status:
type: Steps
hello-world-647r7-1045616760:
boundaryID: hello-world-647r7-206029318
children:
- hello-world-647r7-370991976
displayName: '[0]'
finishedAt: null
id: hello-world-647r7-1045616760
Expand Down
17 changes: 10 additions & 7 deletions workflow/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,16 +1271,19 @@ func (woc *wfOperationCtx) failNodesWithoutCreatedPodsAfterDeadlineOrShutdown()
if node.Fulfilled() {
continue
}
// fail suspended nodes or taskset nodes when shutting down
if woc.GetShutdownStrategy().Enabled() && (node.IsActiveSuspendNode() || node.IsTaskSetNode()) {
message := fmt.Sprintf("Stopped with strategy '%s'", woc.GetShutdownStrategy())
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
// Only fail nodes that are not part of exit handler if we are "Stopping" or all pods if we are "Terminating"
if woc.GetShutdownStrategy().Enabled() && !woc.GetShutdownStrategy().ShouldExecute(woc.IsPartOfExitHandler(node)) {
// fail suspended nodes or taskset nodes when shutting down
if node.IsActiveSuspendNode() || node.IsTaskSetNode() {
message := fmt.Sprintf("Stopped with strategy '%s'", woc.GetShutdownStrategy())
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
}
}

// fail all pending and suspended nodes when exceeding deadline
// fail pending and suspended nodes that are not part of exit handler when exceeding deadline
deadlineExceeded := woc.workflowDeadline != nil && time.Now().UTC().After(*woc.workflowDeadline)
if deadlineExceeded && (node.Phase == wfv1.NodePending || node.IsActiveSuspendNode()) {
if deadlineExceeded && !woc.IsPartOfExitHandler(node) && (node.Phase == wfv1.NodePending || node.IsActiveSuspendNode()) {
message := "Step exceeded its deadline"
woc.markNodePhase(node.Name, wfv1.NodeFailed, message)
continue
Expand Down

0 comments on commit a5c011f

Please sign in to comment.