From 508662a42aa3ab6f08074f74253c433b2a11792a Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:43:42 +1100 Subject: [PATCH 1/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- workflow/controller/operator.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 94b177236027..a8c4349171a0 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -1406,7 +1406,7 @@ func (woc *wfOperationCtx) assessNodeStatus(ctx context.Context, pod *apiv1.Pod, woc.markNodePhase(ctrNodeName, wfv1.NodeRunning) case c.State.Terminated != nil: exitCode := int(c.State.Terminated.ExitCode) - message := fmt.Sprintf("%s (exit code %d): %s", c.State.Terminated.Reason, exitCode, c.State.Terminated.Message) + message := fmt.Sprintf("%s (exit code %d): %s in %s", c.State.Terminated.Reason, exitCode, c.State.Terminated.Message, c.Name) switch exitCode { case 0: woc.markNodePhase(ctrNodeName, wfv1.NodeSucceeded) @@ -1629,6 +1629,7 @@ func (woc *wfOperationCtx) inferFailedReason(pod *apiv1.Pod, tmpl *wfv1.Template if t.Message != "" { msg = fmt.Sprintf("%s: %s", msg, t.Message) } + msg = fmt.Sprintf("%s in %s", msg, ctr.Name) switch { case ctr.Name == common.InitContainerName: From 1bfd8b21d75b6686164af6243d0c9def6764cafb Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Sat, 19 Oct 2024 16:12:42 +1100 Subject: [PATCH 2/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- test/e2e/cli_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index 067a85af4d14..66963a537736 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -894,7 +894,7 @@ func (s *CLISuite) TestWorkflowRetryWithRecreatedPVC() { assert.Equal(t, wfv1.NodeFailed, status.Nodes.FindByDisplayName("print").Phase) // This step is failed intentionally to allow retry. The error message is not related to PVC that is deleted // previously since it is re-created during retry. - assert.Equal(t, "Error (exit code 1)", status.Nodes.FindByDisplayName("print").Message) + assert.Equal(t, "Error (exit code 1) in main", status.Nodes.FindByDisplayName("print").Message) }) } From 81c0afd725f64acfae9746c4b9fa589f9e5622d1 Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Sun, 27 Oct 2024 19:37:56 +1100 Subject: [PATCH 3/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- workflow/controller/operator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index a8c4349171a0..ff568f2f2c6c 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -1629,7 +1629,7 @@ func (woc *wfOperationCtx) inferFailedReason(pod *apiv1.Pod, tmpl *wfv1.Template if t.Message != "" { msg = fmt.Sprintf("%s: %s", msg, t.Message) } - msg = fmt.Sprintf("%s in %s", msg, ctr.Name) + msg = fmt.Sprintf("%s: %s", ctr.Name, msg) switch { case ctr.Name == common.InitContainerName: From bfc2dac11dd01c857b5d2404808ace8dd814965a Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Sun, 27 Oct 2024 19:38:10 +1100 Subject: [PATCH 4/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- workflow/controller/operator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index ff568f2f2c6c..27376fd2efd2 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -1406,7 +1406,7 @@ func (woc *wfOperationCtx) assessNodeStatus(ctx context.Context, pod *apiv1.Pod, woc.markNodePhase(ctrNodeName, wfv1.NodeRunning) case c.State.Terminated != nil: exitCode := int(c.State.Terminated.ExitCode) - message := fmt.Sprintf("%s (exit code %d): %s in %s", c.State.Terminated.Reason, exitCode, c.State.Terminated.Message, c.Name) + message := fmt.Sprintf("%s: %s (exit code %d): %s", c.Name, c.State.Terminated.Reason, exitCode, c.State.Terminated.Message) switch exitCode { case 0: woc.markNodePhase(ctrNodeName, wfv1.NodeSucceeded) From 5637d0949fad368732f6630332bdc9695f952c18 Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Sun, 27 Oct 2024 19:38:19 +1100 Subject: [PATCH 5/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- test/e2e/cli_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index 66963a537736..7ec42fc16508 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -894,7 +894,7 @@ func (s *CLISuite) TestWorkflowRetryWithRecreatedPVC() { assert.Equal(t, wfv1.NodeFailed, status.Nodes.FindByDisplayName("print").Phase) // This step is failed intentionally to allow retry. The error message is not related to PVC that is deleted // previously since it is re-created during retry. - assert.Equal(t, "Error (exit code 1) in main", status.Nodes.FindByDisplayName("print").Message) + assert.Equal(t, "main: Error (exit code 1)", status.Nodes.FindByDisplayName("print").Message) }) } From c920299956f80f84b1b8a21e64111cca753cfd85 Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Tue, 29 Oct 2024 06:57:02 +1100 Subject: [PATCH 6/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- docs/upgrading.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/upgrading.md b/docs/upgrading.md index 74ec313eb94f..c8cadc19f0ec 100644 --- a/docs/upgrading.md +++ b/docs/upgrading.md @@ -93,6 +93,10 @@ To disable this set `metricsConfig.secure` to `false`. When returning a map or array in an expression, you would get a Golang representation. This now returns plain JSON. +### Container name in error messages + +Error messages are prefixed with container name, you may need to adjust your `lastRetry.message` expressions or `TRANSIENT_ERROR_PATTERN` variable. See [Conditional retries](retries.md) + ## Upgrading to v3.5 There are no known breaking changes in this release. From c79b57b36dfd0ce02c779383604ecb183392e43f Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Tue, 29 Oct 2024 07:12:10 +1100 Subject: [PATCH 7/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- docs/upgrading.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/upgrading.md b/docs/upgrading.md index 001cd2cec471..0da654e03992 100644 --- a/docs/upgrading.md +++ b/docs/upgrading.md @@ -104,7 +104,7 @@ To disable this set `metricsConfig.secure` to `false`. When returning a map or array in an expression, you would get a Golang representation. This now returns plain JSON. -### Container name in error messages +### Added container name to workflow node error messages Error messages are prefixed with container name, you may need to adjust your `lastRetry.message` expressions or `TRANSIENT_ERROR_PATTERN` variable. See [Conditional retries](retries.md) From 225773a30f679b932fff270b3f743a01069377b9 Mon Sep 17 00:00:00 2001 From: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Date: Tue, 29 Oct 2024 07:12:23 +1100 Subject: [PATCH 8/8] Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: tooptoop4 <33283496+tooptoop4@users.noreply.github.com> --- docs/upgrading.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/upgrading.md b/docs/upgrading.md index 0da654e03992..7ee530eff963 100644 --- a/docs/upgrading.md +++ b/docs/upgrading.md @@ -106,7 +106,8 @@ This now returns plain JSON. ### Added container name to workflow node error messages -Error messages are prefixed with container name, you may need to adjust your `lastRetry.message` expressions or `TRANSIENT_ERROR_PATTERN` variable. See [Conditional retries](retries.md) +Workflow node error messages are now prefixed with the container name. +If you are using [Conditional Retries](retries.md#conditional-retries), you may need to adjust your usage of `lastRetry.message` expressions or the `TRANSIENT_ERROR_PATTERN` environment variable. ### `ARGO_TEMPLATE` removed from main container