From 0a7fe4c4c26c1f6e1d1dba213f5bc1043eda018a Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 19 Mar 2024 23:55:08 +0800 Subject: [PATCH 01/63] fix: test doc Signed-off-by: shuangkun --- docs/container-set-template.md | 8 +++ test/e2e/cli_test.go | 56 +++++++++++++++++++ .../workflow-template-with-containerset.yaml | 32 +++++++++++ 3 files changed, 96 insertions(+) create mode 100644 test/e2e/testdata/workflow-template-with-containerset.yaml diff --git a/docs/container-set-template.md b/docs/container-set-template.md index d27cb27df95b..8d17eee2b5ec 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -116,3 +116,11 @@ Example B: Lopsided requests, e.g. `a -> b` where `a` is cheap and `b` is expens Can you see the problem here? `a` only has small requests, but the container set will use the total of all requests. So it's as if you're using all that GPU for 10h. This will be expensive. Solution: do not use container set when you have lopsided requests. + +## Container Set Retries + +Container Set Retry policies describes how to retry a container nodes in the container set if it fails. + +Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + +The shell command should be normal otherwise it will not be retried. diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index 0f67010cebed..f34dd7ae3b98 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -1733,6 +1733,62 @@ func (s *CLISuite) TestPluginStruct() { }) } +func (s *CLISuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { + var name string + s.Given(). + WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). + Workflow(` +metadata: + generateName: workflow-template-containerset- +spec: + workflowTemplateRef: + name: containerset-with-retrystrategy +`). + When(). + CreateWorkflowTemplates(). + SubmitWorkflow(). + WaitForWorkflow(fixtures.ToBeFailed). + Then(). + ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { + assert.Equal(t, status.Phase, wfv1.WorkflowFailed) + name = metadata.Name + }) + // Success, no need retry + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c1"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 1, count) + assert.Contains(t, output, "hi") + } + }) + }) + // Command err. No retry logic is entered. + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c2"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 0, count) + assert.Contains(t, output, "executable file not found in $PATH") + } + }) + }) + // Retry when err. + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c3"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 2, count) + countFailureInfo := strings.Count(output, "intentional failure") + assert.Equal(t, 2, countFailureInfo) + } + }) + }) +} + func TestCLISuite(t *testing.T) { suite.Run(t, new(CLISuite)) } diff --git a/test/e2e/testdata/workflow-template-with-containerset.yaml b/test/e2e/testdata/workflow-template-with-containerset.yaml new file mode 100644 index 000000000000..463c2a3e901f --- /dev/null +++ b/test/e2e/testdata/workflow-template-with-containerset.yaml @@ -0,0 +1,32 @@ +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: containerset-with-retrystrategy + annotations: + workflows.argoproj.io/description: | + This workflow template is used to create a workflow with containerset. +spec: + entrypoint: test + templates: + - name: test + containerSet: + retryStrategy: + retries: "2" + containers: + - name: c1 + image: python:alpine3.6 + command: + - python + - -c + args: + - | + print("hi") + - name: c2 + image: python:alpine3.6 + command: + - invalid + - command + - name: c3 + image: alpine:latest + command: [ sh, -c ] + args: [ "echo intentional failure; exit 1" ] \ No newline at end of file From e57439fd9f4128ba2f53a4f73b90a41d7049b130 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 01:04:39 +0800 Subject: [PATCH 02/63] fix: test Signed-off-by: shuangkun --- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 1 + pkg/apis/workflow/v1alpha1/generated.proto | 1 + 2 files changed, 2 insertions(+) diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fb685a3e81f5..e0f6b5d5a64c 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -14,6 +14,7 @@ type ContainerSetTemplate struct { VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + // The shell command should work fine otherwise it won't be retried. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index c06d42d0e9c2..3dd1cd80b1ad 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -421,6 +421,7 @@ message ContainerSetTemplate { // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + // The shell command should work fine otherwise it won't be retried. optional ContainerSetRetryStrategy retryStrategy = 5; } From 6bdf3fe40bc286a139aadfed41990c3c3ded5285 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 01:15:04 +0800 Subject: [PATCH 03/63] fix: test Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/fields.md | 2 +- pkg/apis/workflow/v1alpha1/openapi_generated.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index 982416175d60..f7c753fcfeff 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4359,7 +4359,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set." + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 46f6d2d730b1..dc8312c52359 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8311,7 +8311,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.", + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/fields.md b/docs/fields.md index 604d0b5de42c..d6a3a3deb4a2 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 10aaa7b910e8..fb877ad3d038 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1988,7 +1988,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.", + Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 609b5650612fdc8a9988ce478dc4420a2224f68c Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 11:46:38 +0800 Subject: [PATCH 04/63] fix: docs Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/container-set-template.md | 18 +++++++++++++++++- docs/fields.md | 2 +- .../v1alpha1/container_set_template_types.go | 2 +- pkg/apis/workflow/v1alpha1/generated.proto | 2 +- .../workflow/v1alpha1/openapi_generated.go | 2 +- 7 files changed, 23 insertions(+), 7 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index f7c753fcfeff..e94a50f48d59 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4359,7 +4359,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried." + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index dc8312c52359..658cff920db7 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8311,7 +8311,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 8d17eee2b5ec..ad1ee5a69408 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -119,8 +119,24 @@ Solution: do not use container set when you have lopsided requests. ## Container Set Retries +> v3.3 and after + Container Set Retry policies describes how to retry a container nodes in the container set if it fails. Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. -The shell command should be normal otherwise it will not be retried. +The container won't retry if it's unable to locate the command. + +Here is an example of a Container Set Template with `retryStrategy`: + +```yaml + containerSet: + containers: + - name: retry-containerset + image: alpine:latest + retryStrategy: + limit: "3" + command: [ sh, -c ] + args: [ "echo intentional failure; exit 1" ] +``` + diff --git a/docs/fields.md b/docs/fields.md index d6a3a3deb4a2..c006fe53f6bf 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index e0f6b5d5a64c..eb73cc744559 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -14,7 +14,7 @@ type ContainerSetTemplate struct { VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The shell command should work fine otherwise it won't be retried. + // The container won't retry if it's unable to locate the command. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 3dd1cd80b1ad..1a16384f2548 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -421,7 +421,7 @@ message ContainerSetTemplate { // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The shell command should work fine otherwise it won't be retried. + // The container won't retry if it's unable to locate the command. optional ContainerSetRetryStrategy retryStrategy = 5; } diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index fb877ad3d038..7f5b8b8df115 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1988,7 +1988,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", + Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From cb0689342513692ae72f61184a0d498275428bc8 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 11:49:37 +0800 Subject: [PATCH 05/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ad1ee5a69408..29b9074b3f08 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -139,4 +139,3 @@ Here is an example of a Container Set Template with `retryStrategy`: command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] ``` - From 0ffacb22361b68f14c7857545d10506d87527924 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 23:06:53 +0800 Subject: [PATCH 06/63] fix: move clisute to retry suit. Signed-off-by: shuangkun --- test/e2e/cli_test.go | 56 -------------------------------- test/e2e/retry_test.go | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 56 deletions(-) diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index f34dd7ae3b98..0f67010cebed 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -1733,62 +1733,6 @@ func (s *CLISuite) TestPluginStruct() { }) } -func (s *CLISuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { - var name string - s.Given(). - WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). - Workflow(` -metadata: - generateName: workflow-template-containerset- -spec: - workflowTemplateRef: - name: containerset-with-retrystrategy -`). - When(). - CreateWorkflowTemplates(). - SubmitWorkflow(). - WaitForWorkflow(fixtures.ToBeFailed). - Then(). - ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { - assert.Equal(t, status.Phase, wfv1.WorkflowFailed) - name = metadata.Name - }) - // Success, no need retry - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c1"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 1, count) - assert.Contains(t, output, "hi") - } - }) - }) - // Command err. No retry logic is entered. - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c2"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 0, count) - assert.Contains(t, output, "executable file not found in $PATH") - } - }) - }) - // Retry when err. - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c3"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 2, count) - countFailureInfo := strings.Count(output, "intentional failure") - assert.Equal(t, 2, countFailureInfo) - } - }) - }) -} - func TestCLISuite(t *testing.T) { suite.Run(t, new(CLISuite)) } diff --git a/test/e2e/retry_test.go b/test/e2e/retry_test.go index 740ef42d1967..1025eaccc690 100644 --- a/test/e2e/retry_test.go +++ b/test/e2e/retry_test.go @@ -4,6 +4,9 @@ package e2e import ( + "context" + "io" + "strings" "testing" "time" @@ -120,6 +123,77 @@ spec: }) } +func (s *RetryTestSuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { + var name string + var ns string + s.Given(). + WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). + Workflow(` +metadata: + name: workflow-template-containerset +spec: + workflowTemplateRef: + name: containerset-with-retrystrategy +`). + When(). + CreateWorkflowTemplates(). + SubmitWorkflow(). + WaitForWorkflow(fixtures.ToBeFailed). + Then(). + ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { + assert.Equal(t, status.Phase, wfv1.WorkflowFailed) + }). + ExpectWorkflowNode(func(status v1alpha1.NodeStatus) bool { + return status.Name == "workflow-template-containerset" + }, func(t *testing.T, status *v1alpha1.NodeStatus, pod *apiv1.Pod) { + name = pod.GetName() + ns = pod.GetNamespace() + }) + // Success, no need retry + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c1"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 1, count) + assert.Contains(s.T(), output, "hi") + }) + // Command err. No retry logic is entered. + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c2"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 0, count) + assert.Contains(s.T(), output, "executable file not found in $PATH") + }) + // Retry when err. + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c3"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 2, count) + countFailureInfo := strings.Count(output, "intentional failure") + assert.Equal(s.T(), 1, countFailureInfo) + }) +} + func TestRetrySuite(t *testing.T) { suite.Run(t, new(RetryTestSuite)) } From dec8483b2b101453329e0cce03640add09b066cc Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 21 Mar 2024 04:38:26 +0800 Subject: [PATCH 07/63] fix: test Signed-off-by: shuangkun --- test/e2e/retry_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/retry_test.go b/test/e2e/retry_test.go index 1025eaccc690..bc1ad53e928e 100644 --- a/test/e2e/retry_test.go +++ b/test/e2e/retry_test.go @@ -190,7 +190,7 @@ spec: count := strings.Count(output, "capturing logs") assert.Equal(s.T(), 2, count) countFailureInfo := strings.Count(output, "intentional failure") - assert.Equal(s.T(), 1, countFailureInfo) + assert.Equal(s.T(), 2, countFailureInfo) }) } From 9b684d6f4789d61f07e512cde74982fe9c40f7b1 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:44:48 +0800 Subject: [PATCH 08/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 29b9074b3f08..726a351e6ce5 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -117,7 +117,7 @@ Can you see the problem here? `a` only has small requests, but the container set Solution: do not use container set when you have lopsided requests. -## Container Set Retries +## `retryStrategy` usage > v3.3 and after From f6dd09187c1200690d3a620e938a817abc352919 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:13:08 +0800 Subject: [PATCH 09/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 17 ++++++++++------- .../v1alpha1/container_set_template_types.go | 8 ++++---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 726a351e6ce5..4ee30f75d0ca 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -121,21 +121,24 @@ Solution: do not use container set when you have lopsided requests. > v3.3 and after -Container Set Retry policies describes how to retry a container nodes in the container set if it fails. +You can set a `retryStrategy` to apply to all containers of a container set. -Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. +This currently works differently from [retries](retries.md) for other template types. +You can only set the `duration` between each retry and the total number of `retries`. -The container won't retry if it's unable to locate the command. - -Here is an example of a Container Set Template with `retryStrategy`: +See an example below: ```yaml containerSet: + retryStrategy: + retries: "2" containers: - name: retry-containerset image: alpine:latest - retryStrategy: - limit: "3" command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] ``` + +!!! Note + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. \ No newline at end of file diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index eb73cc744559..fc4c177f1417 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -12,17 +12,17 @@ import ( type ContainerSetTemplate struct { Containers []ContainerNode `json:"containers" protobuf:"bytes,4,rep,name=containers"` VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` - // RetryStrategy describes how to retry a container nodes in the container set if it fails. - // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The container won't retry if it's unable to locate the command. + // RetryStrategy describes how to retry container nodes if the container set fails. + // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } +// ContainerSetRetryStrategy provides controls on how to retry a container set type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Duration string `json:"duration,omitempty" protobuf:"bytes,1,opt,name=duration"` - // Nbr of retries + // Retries is the number of retry attempts when retrying a container. Retries *intstr.IntOrString `json:"retries" protobuf:"bytes,2,rep,name=retries"` } From b08c9777f73297366f0483afa3b98a90a8c4fc5a Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:23:10 +0800 Subject: [PATCH 10/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 26 +++++++++++++++++++++- pkg/apis/workflow/v1alpha1/generated.proto | 8 +++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 4ee30f75d0ca..054082ef7fbb 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -129,14 +129,38 @@ You can only set the `duration` between each retry and the total number of `retr See an example below: ```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: containerset-with-retrystrategy + annotations: + workflows.argoproj.io/description: | + This workflow template is used to create a workflow with containerset with retrystrategy. +spec: + entrypoint: containerset-retrystrategy-example + templates: + - name: containerset-retrystrategy-example containerSet: retryStrategy: retries: "2" containers: - - name: retry-containerset + - name: success + image: python:alpine3.6 + command: + - python + - -c + args: + - | + print("hi") + - name: fail-retry image: alpine:latest command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] + - name: invalic-command + image: python:alpine3.6 + command: + - invalid + - command ``` !!! Note diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 1a16384f2548..ed32315d4152 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -405,12 +405,13 @@ message ContainerNode { repeated string dependencies = 2; } +// ContainerSetRetryStrategy provides controls on how to retry a container set message ContainerSetRetryStrategy { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". optional string duration = 1; - // Nbr of retries + // Retries is the number of retry attempts when retrying a container. optional k8s.io.apimachinery.pkg.util.intstr.IntOrString retries = 2; } @@ -419,9 +420,8 @@ message ContainerSetTemplate { repeated k8s.io.api.core.v1.VolumeMount volumeMounts = 3; - // RetryStrategy describes how to retry a container nodes in the container set if it fails. - // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The container won't retry if it's unable to locate the command. + // RetryStrategy describes how to retry container nodes if the container set fails. + // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. optional ContainerSetRetryStrategy retryStrategy = 5; } From da0b05ad894fbec47fd7dd70defecb692fca776c Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:34:35 +0800 Subject: [PATCH 11/63] fix: codegen. Signed-off-by: shuangkun --- api/jsonschema/schema.json | 5 +++-- api/openapi-spec/swagger.json | 5 +++-- docs/container-set-template.md | 3 +-- docs/fields.md | 6 +++--- pkg/apis/workflow/v1alpha1/openapi_generated.go | 7 ++++--- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index e94a50f48d59..feff33c7a4d2 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4334,6 +4334,7 @@ "type": "object" }, "io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy": { + "description": "ContainerSetRetryStrategy provides controls on how to retry a container set", "properties": { "duration": { "description": "Duration is the time between each retry, examples values are \"300ms\", \"1s\" or \"5m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\".", @@ -4341,7 +4342,7 @@ }, "retries": { "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString", - "description": "Nbr of retries" + "description": "Retries is the number of retry attempts when retrying a container." } }, "required": [ @@ -4359,7 +4360,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command." + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 658cff920db7..c69984d3574a 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8283,6 +8283,7 @@ } }, "io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy": { + "description": "ContainerSetRetryStrategy provides controls on how to retry a container set", "type": "object", "required": [ "retries" @@ -8293,7 +8294,7 @@ "type": "string" }, "retries": { - "description": "Nbr of retries", + "description": "Retries is the number of retry attempts when retrying a container.", "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString" } } @@ -8311,7 +8312,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 054082ef7fbb..ee929c1e04a6 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,5 +164,4 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. \ No newline at end of file diff --git a/docs/fields.md b/docs/fields.md index c006fe53f6bf..046c648fd50d 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate @@ -3748,7 +3748,7 @@ _No description available_ ## ContainerSetRetryStrategy -_No description available_ +ContainerSetRetryStrategy provides controls on how to retry a container set
Examples with this field (click to open) @@ -3780,7 +3780,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`duration`|`string`|Duration is the time between each retry, examples values are "300ms", "1s" or "5m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".| -|`retries`|[`IntOrString`](#intorstring)|Nbr of retries| +|`retries`|[`IntOrString`](#intorstring)|Retries is the number of retry attempts when retrying a container.| ## DAGTask diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 7f5b8b8df115..87e6aeae8d0d 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1930,7 +1930,8 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe return common.OpenAPIDefinition{ Schema: spec.Schema{ SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, + Description: "ContainerSetRetryStrategy provides controls on how to retry a container set", + Type: []string{"object"}, Properties: map[string]spec.Schema{ "duration": { SchemaProps: spec.SchemaProps{ @@ -1941,7 +1942,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe }, "retries": { SchemaProps: spec.SchemaProps{ - Description: "Nbr of retries", + Description: "Retries is the number of retry attempts when retrying a container.", Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), }, }, @@ -1988,7 +1989,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", + Description: "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 396700d5d096bd4d1acb108c033cd995bccda37b Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:51:08 +0800 Subject: [PATCH 12/63] fix: codegen Signed-off-by: shuangkun --- docs/container-set-template.md | 3 ++- docs/executor_swagger.md | 5 ++++- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 2 +- pkg/plugins/executor/swagger.yml | 2 ++ .../IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md | 1 + .../IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md | 1 + 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ee929c1e04a6..dd16eee96052 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,4 +164,5 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index fb365f905478..a6e2bd6e561e 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,7 +1026,10 @@ referred to by services. ### ContainerSetRetryStrategy - +ContainerSetRetryStrategy provides controls on how to retry a container set + + + diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fc4c177f1417..1917b177beaa 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -17,7 +17,7 @@ type ContainerSetTemplate struct { RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } -// ContainerSetRetryStrategy provides controls on how to retry a container set +// ContainerSetRetryStrategy provides controls on how to retry a container set. type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index 4cf099c2e39c..ca683fbc2d37 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1002,6 +1002,8 @@ definitions: title: ContainerPort represents a network port in a single container. type: object ContainerSetRetryStrategy: + description: ContainerSetRetryStrategy provides controls on how to retry a container + set properties: duration: description: |- diff --git a/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md b/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md index 19c11c6bb471..6a059f3b0315 100644 --- a/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md +++ b/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md @@ -2,6 +2,7 @@ # IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy +ContainerSetRetryStrategy provides controls on how to retry a container set ## Properties diff --git a/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md b/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md index 50e09bc46bd8..b7bfad4cd414 100644 --- a/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md +++ b/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md @@ -1,5 +1,6 @@ # IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy +ContainerSetRetryStrategy provides controls on how to retry a container set ## Properties Name | Type | Description | Notes From 136ae106817e7ec0964fbd04a9a0402e866cdec7 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:57:28 +0800 Subject: [PATCH 13/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index dd16eee96052..0b2e17e6b30e 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,5 +164,6 @@ spec: ``` !!! Note + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. From 46965bc5adfa8e83383e83dcf5342df54b2c1bfb Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:10:08 +0800 Subject: [PATCH 14/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 0b2e17e6b30e..02a610516d60 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -163,7 +163,6 @@ spec: - command ``` -!!! Note +!!! NOTE + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. - A container set will not be retried if a container's `command` cannot be located. - As it will fail each time, the retry logic is short-circuited. From 1703b4c7d372f96d7e411418e9b1029eb2b7fc7a Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:12:16 +0800 Subject: [PATCH 15/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 02a610516d60..de42236e8724 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -165,4 +165,3 @@ spec: !!! NOTE A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. - From 728138a3c45511f56606841eee3743e1088c1bef Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:51:07 +0800 Subject: [PATCH 16/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 3 --- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 2 +- pkg/plugins/executor/swagger.yml | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index a6e2bd6e561e..e50a747e013b 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,9 +1026,6 @@ referred to by services. ### ContainerSetRetryStrategy -ContainerSetRetryStrategy provides controls on how to retry a container set - - diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index 1917b177beaa..fc4c177f1417 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -17,7 +17,7 @@ type ContainerSetTemplate struct { RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } -// ContainerSetRetryStrategy provides controls on how to retry a container set. +// ContainerSetRetryStrategy provides controls on how to retry a container set type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index ca683fbc2d37..f42433df8a72 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1012,6 +1012,8 @@ definitions: type: string retries: $ref: '#/definitions/IntOrString' + title: ContainerSetRetryStrategy provides controls on how to retry a container + set type: object ContainerSetTemplate: properties: From fabf22b2a10804cb6cf38e232ca66341ef47b227 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:59:25 +0800 Subject: [PATCH 17/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 ++ pkg/plugins/executor/swagger.yml | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index e50a747e013b..3947376961a9 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,6 +1026,8 @@ referred to by services. ### ContainerSetRetryStrategy +> ContainerSetRetryStrategy provides controls on how to retry a container set + diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index f42433df8a72..ca683fbc2d37 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1012,8 +1012,6 @@ definitions: type: string retries: $ref: '#/definitions/IntOrString' - title: ContainerSetRetryStrategy provides controls on how to retry a container - set type: object ContainerSetTemplate: properties: From 41822f1dfaab709f01468a52439d9d0ae2d3f8f9 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:00:27 +0800 Subject: [PATCH 18/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index 3947376961a9..2976c6e8c760 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1032,6 +1032,7 @@ referred to by services. + **Properties** | Name | Type | Go type | Required | Default | Description | Example | From 439c7ab027b0b74bf85407442b1b9663a0dd9d05 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:08:06 +0800 Subject: [PATCH 19/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index 2976c6e8c760..f94fe9767c15 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1027,7 +1027,7 @@ referred to by services. > ContainerSetRetryStrategy provides controls on how to retry a container set - + From ba61ea975fc57d458cb9471b84166903249dfc43 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:28:43 +0800 Subject: [PATCH 20/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index f94fe9767c15..230da5919885 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1027,7 +1027,7 @@ referred to by services. > ContainerSetRetryStrategy provides controls on how to retry a container set - + From 257596bf823d6f09da31d326870854e3f1f0c867 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:09:46 +0800 Subject: [PATCH 21/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 21 +++++++++---------- .../v1alpha1/container_set_template_types.go | 3 ++- .../workflow-template-with-containerset.yaml | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index de42236e8724..38070fa9df93 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -135,15 +135,17 @@ metadata: name: containerset-with-retrystrategy annotations: workflows.argoproj.io/description: | - This workflow template is used to create a workflow with containerset with retrystrategy. + This workflow creates a container set with a retryStrategy. spec: entrypoint: containerset-retrystrategy-example templates: - name: containerset-retrystrategy-example containerSet: retryStrategy: - retries: "2" + retries: "10" # if fails, retry at most ten times + duration: 30s # retry for at most 30s containers: + # this container completes successfully, it won't retried. - name: success image: python:alpine3.6 command: @@ -152,16 +154,13 @@ spec: args: - | print("hi") + # if fails, it will retry at most ten times. - name: fail-retry - image: alpine:latest - command: [ sh, -c ] - args: [ "echo intentional failure; exit 1" ] - - name: invalic-command image: python:alpine3.6 - command: - - invalid - - command + command: ["python", -c] + args: ["import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)"] ``` -!!! NOTE - A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. +!!! Note + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fc4c177f1417..4c935279dc49 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -22,7 +22,8 @@ type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Duration string `json:"duration,omitempty" protobuf:"bytes,1,opt,name=duration"` - // Retries is the number of retry attempts when retrying a container. + // Retries is the maximum number of retry attempts for each container. It does not include the + // first, original attempt; the maximum number of total attempts will be `retries + 1`. Retries *intstr.IntOrString `json:"retries" protobuf:"bytes,2,rep,name=retries"` } diff --git a/test/e2e/testdata/workflow-template-with-containerset.yaml b/test/e2e/testdata/workflow-template-with-containerset.yaml index 463c2a3e901f..b2f4c32a880a 100644 --- a/test/e2e/testdata/workflow-template-with-containerset.yaml +++ b/test/e2e/testdata/workflow-template-with-containerset.yaml @@ -4,7 +4,7 @@ metadata: name: containerset-with-retrystrategy annotations: workflows.argoproj.io/description: | - This workflow template is used to create a workflow with containerset. + This workflow creates a container set with a retryStrategy. spec: entrypoint: test templates: From 707fbf31a456af2342e59c9f756d01b83bc20ca5 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:29:30 +0800 Subject: [PATCH 22/63] fix: test Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/fields.md | 2 +- pkg/apis/workflow/v1alpha1/generated.proto | 3 ++- pkg/apis/workflow/v1alpha1/openapi_generated.go | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index feff33c7a4d2..e69d02f14ab7 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4342,7 +4342,7 @@ }, "retries": { "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString", - "description": "Retries is the number of retry attempts when retrying a container." + "description": "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`." } }, "required": [ diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index c69984d3574a..3532f5b34905 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8294,7 +8294,7 @@ "type": "string" }, "retries": { - "description": "Retries is the number of retry attempts when retrying a container.", + "description": "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.", "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString" } } diff --git a/docs/fields.md b/docs/fields.md index 046c648fd50d..26bb626e4c6e 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -3780,7 +3780,7 @@ ContainerSetRetryStrategy provides controls on how to retry a container set | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`duration`|`string`|Duration is the time between each retry, examples values are "300ms", "1s" or "5m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".| -|`retries`|[`IntOrString`](#intorstring)|Retries is the number of retry attempts when retrying a container.| +|`retries`|[`IntOrString`](#intorstring)|Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.| ## DAGTask diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index ed32315d4152..f589c9164999 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -411,7 +411,8 @@ message ContainerSetRetryStrategy { // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". optional string duration = 1; - // Retries is the number of retry attempts when retrying a container. + // Retries is the maximum number of retry attempts for each container. It does not include the + // first, original attempt; the maximum number of total attempts will be `retries + 1`. optional k8s.io.apimachinery.pkg.util.intstr.IntOrString retries = 2; } diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 87e6aeae8d0d..260ca6fd4f96 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1942,7 +1942,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe }, "retries": { SchemaProps: spec.SchemaProps{ - Description: "Retries is the number of retry attempts when retrying a container.", + Description: "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.", Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), }, }, From 76cee1ab536b64243b4b61ced0fc3dcf89cf67ce Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:52:06 +0800 Subject: [PATCH 23/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 38070fa9df93..0b66522952a9 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -162,5 +162,5 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. From 0387c4edfb2ccb5b6a045fac43a0d38d0b3d0faa Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 19 Mar 2024 23:55:08 +0800 Subject: [PATCH 24/63] fix: test doc Signed-off-by: shuangkun --- docs/container-set-template.md | 8 +++ test/e2e/cli_test.go | 56 +++++++++++++++++++ .../workflow-template-with-containerset.yaml | 32 +++++++++++ 3 files changed, 96 insertions(+) create mode 100644 test/e2e/testdata/workflow-template-with-containerset.yaml diff --git a/docs/container-set-template.md b/docs/container-set-template.md index d27cb27df95b..8d17eee2b5ec 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -116,3 +116,11 @@ Example B: Lopsided requests, e.g. `a -> b` where `a` is cheap and `b` is expens Can you see the problem here? `a` only has small requests, but the container set will use the total of all requests. So it's as if you're using all that GPU for 10h. This will be expensive. Solution: do not use container set when you have lopsided requests. + +## Container Set Retries + +Container Set Retry policies describes how to retry a container nodes in the container set if it fails. + +Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + +The shell command should be normal otherwise it will not be retried. diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index 394db640d073..652c207f29e0 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -1842,6 +1842,62 @@ func (s *CLISuite) TestPluginStruct() { }) } +func (s *CLISuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { + var name string + s.Given(). + WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). + Workflow(` +metadata: + generateName: workflow-template-containerset- +spec: + workflowTemplateRef: + name: containerset-with-retrystrategy +`). + When(). + CreateWorkflowTemplates(). + SubmitWorkflow(). + WaitForWorkflow(fixtures.ToBeFailed). + Then(). + ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { + assert.Equal(t, status.Phase, wfv1.WorkflowFailed) + name = metadata.Name + }) + // Success, no need retry + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c1"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 1, count) + assert.Contains(t, output, "hi") + } + }) + }) + // Command err. No retry logic is entered. + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c2"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 0, count) + assert.Contains(t, output, "executable file not found in $PATH") + } + }) + }) + // Retry when err. + s.Run("ContainerLogs", func() { + s.Given(). + RunCli([]string{"logs", name, name, "-c", "c3"}, func(t *testing.T, output string, err error) { + if assert.NoError(t, err) { + count := strings.Count(output, "capturing logs") + assert.Equal(t, 2, count) + countFailureInfo := strings.Count(output, "intentional failure") + assert.Equal(t, 2, countFailureInfo) + } + }) + }) +} + func TestCLISuite(t *testing.T) { suite.Run(t, new(CLISuite)) } diff --git a/test/e2e/testdata/workflow-template-with-containerset.yaml b/test/e2e/testdata/workflow-template-with-containerset.yaml new file mode 100644 index 000000000000..463c2a3e901f --- /dev/null +++ b/test/e2e/testdata/workflow-template-with-containerset.yaml @@ -0,0 +1,32 @@ +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: containerset-with-retrystrategy + annotations: + workflows.argoproj.io/description: | + This workflow template is used to create a workflow with containerset. +spec: + entrypoint: test + templates: + - name: test + containerSet: + retryStrategy: + retries: "2" + containers: + - name: c1 + image: python:alpine3.6 + command: + - python + - -c + args: + - | + print("hi") + - name: c2 + image: python:alpine3.6 + command: + - invalid + - command + - name: c3 + image: alpine:latest + command: [ sh, -c ] + args: [ "echo intentional failure; exit 1" ] \ No newline at end of file From cb6202f5a9407777748ee6262e74b79f54e18cf8 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 01:04:39 +0800 Subject: [PATCH 25/63] fix: test Signed-off-by: shuangkun --- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 1 + pkg/apis/workflow/v1alpha1/generated.proto | 1 + 2 files changed, 2 insertions(+) diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fb685a3e81f5..e0f6b5d5a64c 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -14,6 +14,7 @@ type ContainerSetTemplate struct { VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + // The shell command should work fine otherwise it won't be retried. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 221f9f64d9c9..653f6e1d0ee8 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -421,6 +421,7 @@ message ContainerSetTemplate { // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. + // The shell command should work fine otherwise it won't be retried. optional ContainerSetRetryStrategy retryStrategy = 5; } From 6888008908f15ad92369722e68c580d13b4269e8 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 01:15:04 +0800 Subject: [PATCH 26/63] fix: test Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/fields.md | 2 +- pkg/apis/workflow/v1alpha1/openapi_generated.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index 14d8c347afa4..830c9055f8d3 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4359,7 +4359,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set." + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 4fb45809bcc6..a63402029a69 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8311,7 +8311,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.", + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/fields.md b/docs/fields.md index 2eda10c6e4e8..0c2a8d422ee6 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 8f025f9528fc..6f0be776af61 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1988,7 +1988,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set.", + Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 41f582365173050867d461d23ca5514329f99892 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 11:46:38 +0800 Subject: [PATCH 27/63] fix: docs Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/container-set-template.md | 18 +++++++++++++++++- docs/fields.md | 2 +- .../v1alpha1/container_set_template_types.go | 2 +- pkg/apis/workflow/v1alpha1/generated.proto | 2 +- .../workflow/v1alpha1/openapi_generated.go | 2 +- 7 files changed, 23 insertions(+), 7 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index 830c9055f8d3..a87cd1db55bd 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4359,7 +4359,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried." + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index a63402029a69..659b915a6440 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8311,7 +8311,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", + "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 8d17eee2b5ec..ad1ee5a69408 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -119,8 +119,24 @@ Solution: do not use container set when you have lopsided requests. ## Container Set Retries +> v3.3 and after + Container Set Retry policies describes how to retry a container nodes in the container set if it fails. Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. -The shell command should be normal otherwise it will not be retried. +The container won't retry if it's unable to locate the command. + +Here is an example of a Container Set Template with `retryStrategy`: + +```yaml + containerSet: + containers: + - name: retry-containerset + image: alpine:latest + retryStrategy: + limit: "3" + command: [ sh, -c ] + args: [ "echo intentional failure; exit 1" ] +``` + diff --git a/docs/fields.md b/docs/fields.md index 0c2a8d422ee6..41077a60cfdc 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index e0f6b5d5a64c..eb73cc744559 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -14,7 +14,7 @@ type ContainerSetTemplate struct { VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The shell command should work fine otherwise it won't be retried. + // The container won't retry if it's unable to locate the command. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 653f6e1d0ee8..544d44c3a8d6 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -421,7 +421,7 @@ message ContainerSetTemplate { // RetryStrategy describes how to retry a container nodes in the container set if it fails. // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The shell command should work fine otherwise it won't be retried. + // The container won't retry if it's unable to locate the command. optional ContainerSetRetryStrategy retryStrategy = 5; } diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 6f0be776af61..7ca32a73add5 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1988,7 +1988,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The shell command should work fine otherwise it won't be retried.", + Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 5bb915c497baff48b0fc01da38bfc8090b7f30d0 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 11:49:37 +0800 Subject: [PATCH 28/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ad1ee5a69408..29b9074b3f08 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -139,4 +139,3 @@ Here is an example of a Container Set Template with `retryStrategy`: command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] ``` - From 311ce1f0e95b74dac38ab42648cfccdb3c96fbdb Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 20 Mar 2024 23:06:53 +0800 Subject: [PATCH 29/63] fix: move clisute to retry suit. Signed-off-by: shuangkun --- test/e2e/cli_test.go | 56 -------------------------------- test/e2e/retry_test.go | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 56 deletions(-) diff --git a/test/e2e/cli_test.go b/test/e2e/cli_test.go index 652c207f29e0..394db640d073 100644 --- a/test/e2e/cli_test.go +++ b/test/e2e/cli_test.go @@ -1842,62 +1842,6 @@ func (s *CLISuite) TestPluginStruct() { }) } -func (s *CLISuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { - var name string - s.Given(). - WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). - Workflow(` -metadata: - generateName: workflow-template-containerset- -spec: - workflowTemplateRef: - name: containerset-with-retrystrategy -`). - When(). - CreateWorkflowTemplates(). - SubmitWorkflow(). - WaitForWorkflow(fixtures.ToBeFailed). - Then(). - ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { - assert.Equal(t, status.Phase, wfv1.WorkflowFailed) - name = metadata.Name - }) - // Success, no need retry - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c1"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 1, count) - assert.Contains(t, output, "hi") - } - }) - }) - // Command err. No retry logic is entered. - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c2"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 0, count) - assert.Contains(t, output, "executable file not found in $PATH") - } - }) - }) - // Retry when err. - s.Run("ContainerLogs", func() { - s.Given(). - RunCli([]string{"logs", name, name, "-c", "c3"}, func(t *testing.T, output string, err error) { - if assert.NoError(t, err) { - count := strings.Count(output, "capturing logs") - assert.Equal(t, 2, count) - countFailureInfo := strings.Count(output, "intentional failure") - assert.Equal(t, 2, countFailureInfo) - } - }) - }) -} - func TestCLISuite(t *testing.T) { suite.Run(t, new(CLISuite)) } diff --git a/test/e2e/retry_test.go b/test/e2e/retry_test.go index 740ef42d1967..1025eaccc690 100644 --- a/test/e2e/retry_test.go +++ b/test/e2e/retry_test.go @@ -4,6 +4,9 @@ package e2e import ( + "context" + "io" + "strings" "testing" "time" @@ -120,6 +123,77 @@ spec: }) } +func (s *RetryTestSuite) TestWorkflowTemplateWithRetryStrategyInContainerSet() { + var name string + var ns string + s.Given(). + WorkflowTemplate("@testdata/workflow-template-with-containerset.yaml"). + Workflow(` +metadata: + name: workflow-template-containerset +spec: + workflowTemplateRef: + name: containerset-with-retrystrategy +`). + When(). + CreateWorkflowTemplates(). + SubmitWorkflow(). + WaitForWorkflow(fixtures.ToBeFailed). + Then(). + ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) { + assert.Equal(t, status.Phase, wfv1.WorkflowFailed) + }). + ExpectWorkflowNode(func(status v1alpha1.NodeStatus) bool { + return status.Name == "workflow-template-containerset" + }, func(t *testing.T, status *v1alpha1.NodeStatus, pod *apiv1.Pod) { + name = pod.GetName() + ns = pod.GetNamespace() + }) + // Success, no need retry + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c1"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 1, count) + assert.Contains(s.T(), output, "hi") + }) + // Command err. No retry logic is entered. + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c2"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 0, count) + assert.Contains(s.T(), output, "executable file not found in $PATH") + }) + // Retry when err. + s.Run("ContainerLogs", func() { + ctx := context.Background() + podLogOptions := &apiv1.PodLogOptions{Container: "c3"} + stream, err := s.KubeClient.CoreV1().Pods(ns).GetLogs(name, podLogOptions).Stream(ctx) + assert.Nil(s.T(), err) + defer stream.Close() + logBytes, err := io.ReadAll(stream) + assert.Nil(s.T(), err) + output := string(logBytes) + count := strings.Count(output, "capturing logs") + assert.Equal(s.T(), 2, count) + countFailureInfo := strings.Count(output, "intentional failure") + assert.Equal(s.T(), 1, countFailureInfo) + }) +} + func TestRetrySuite(t *testing.T) { suite.Run(t, new(RetryTestSuite)) } From d08e66bc9ee2128fc71c2e85ef62287f5dfb7157 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 21 Mar 2024 04:38:26 +0800 Subject: [PATCH 30/63] fix: test Signed-off-by: shuangkun --- test/e2e/retry_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/retry_test.go b/test/e2e/retry_test.go index 1025eaccc690..bc1ad53e928e 100644 --- a/test/e2e/retry_test.go +++ b/test/e2e/retry_test.go @@ -190,7 +190,7 @@ spec: count := strings.Count(output, "capturing logs") assert.Equal(s.T(), 2, count) countFailureInfo := strings.Count(output, "intentional failure") - assert.Equal(s.T(), 1, countFailureInfo) + assert.Equal(s.T(), 2, countFailureInfo) }) } From 2f31d714d7c4d6c0433aea9e02300def5137b671 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:44:48 +0800 Subject: [PATCH 31/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 29b9074b3f08..726a351e6ce5 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -117,7 +117,7 @@ Can you see the problem here? `a` only has small requests, but the container set Solution: do not use container set when you have lopsided requests. -## Container Set Retries +## `retryStrategy` usage > v3.3 and after From e87bfda12043cb95999eb24b9424af1452454384 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:13:08 +0800 Subject: [PATCH 32/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 17 ++++++++++------- .../v1alpha1/container_set_template_types.go | 8 ++++---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 726a351e6ce5..4ee30f75d0ca 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -121,21 +121,24 @@ Solution: do not use container set when you have lopsided requests. > v3.3 and after -Container Set Retry policies describes how to retry a container nodes in the container set if it fails. +You can set a `retryStrategy` to apply to all containers of a container set. -Number of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. +This currently works differently from [retries](retries.md) for other template types. +You can only set the `duration` between each retry and the total number of `retries`. -The container won't retry if it's unable to locate the command. - -Here is an example of a Container Set Template with `retryStrategy`: +See an example below: ```yaml containerSet: + retryStrategy: + retries: "2" containers: - name: retry-containerset image: alpine:latest - retryStrategy: - limit: "3" command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] ``` + +!!! Note + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. \ No newline at end of file diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index eb73cc744559..fc4c177f1417 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -12,17 +12,17 @@ import ( type ContainerSetTemplate struct { Containers []ContainerNode `json:"containers" protobuf:"bytes,4,rep,name=containers"` VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` - // RetryStrategy describes how to retry a container nodes in the container set if it fails. - // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The container won't retry if it's unable to locate the command. + // RetryStrategy describes how to retry container nodes if the container set fails. + // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } +// ContainerSetRetryStrategy provides controls on how to retry a container set type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Duration string `json:"duration,omitempty" protobuf:"bytes,1,opt,name=duration"` - // Nbr of retries + // Retries is the number of retry attempts when retrying a container. Retries *intstr.IntOrString `json:"retries" protobuf:"bytes,2,rep,name=retries"` } From 395f521b0e048d6bee955cf8b22138cba3db9fb6 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:23:10 +0800 Subject: [PATCH 33/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 26 +++++++++++++++++++++- pkg/apis/workflow/v1alpha1/generated.proto | 8 +++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 4ee30f75d0ca..054082ef7fbb 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -129,14 +129,38 @@ You can only set the `duration` between each retry and the total number of `retr See an example below: ```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: containerset-with-retrystrategy + annotations: + workflows.argoproj.io/description: | + This workflow template is used to create a workflow with containerset with retrystrategy. +spec: + entrypoint: containerset-retrystrategy-example + templates: + - name: containerset-retrystrategy-example containerSet: retryStrategy: retries: "2" containers: - - name: retry-containerset + - name: success + image: python:alpine3.6 + command: + - python + - -c + args: + - | + print("hi") + - name: fail-retry image: alpine:latest command: [ sh, -c ] args: [ "echo intentional failure; exit 1" ] + - name: invalic-command + image: python:alpine3.6 + command: + - invalid + - command ``` !!! Note diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 544d44c3a8d6..8f2c7b3ed053 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -405,12 +405,13 @@ message ContainerNode { repeated string dependencies = 2; } +// ContainerSetRetryStrategy provides controls on how to retry a container set message ContainerSetRetryStrategy { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". optional string duration = 1; - // Nbr of retries + // Retries is the number of retry attempts when retrying a container. optional k8s.io.apimachinery.pkg.util.intstr.IntOrString retries = 2; } @@ -419,9 +420,8 @@ message ContainerSetTemplate { repeated k8s.io.api.core.v1.VolumeMount volumeMounts = 3; - // RetryStrategy describes how to retry a container nodes in the container set if it fails. - // Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. - // The container won't retry if it's unable to locate the command. + // RetryStrategy describes how to retry container nodes if the container set fails. + // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. optional ContainerSetRetryStrategy retryStrategy = 5; } From 2eed44373a68662e59d71beb893361a426b03e5f Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:34:35 +0800 Subject: [PATCH 34/63] fix: codegen. Signed-off-by: shuangkun --- api/jsonschema/schema.json | 5 +++-- api/openapi-spec/swagger.json | 5 +++-- docs/container-set-template.md | 3 +-- docs/fields.md | 6 +++--- pkg/apis/workflow/v1alpha1/openapi_generated.go | 7 ++++--- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index a87cd1db55bd..63bd6811bd1e 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4334,6 +4334,7 @@ "type": "object" }, "io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy": { + "description": "ContainerSetRetryStrategy provides controls on how to retry a container set", "properties": { "duration": { "description": "Duration is the time between each retry, examples values are \"300ms\", \"1s\" or \"5m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\".", @@ -4341,7 +4342,7 @@ }, "retries": { "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString", - "description": "Nbr of retries" + "description": "Retries is the number of retry attempts when retrying a container." } }, "required": [ @@ -4359,7 +4360,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command." + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 659b915a6440..843665ed4961 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8283,6 +8283,7 @@ } }, "io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy": { + "description": "ContainerSetRetryStrategy provides controls on how to retry a container set", "type": "object", "required": [ "retries" @@ -8293,7 +8294,7 @@ "type": "string" }, "retries": { - "description": "Nbr of retries", + "description": "Retries is the number of retry attempts when retrying a container.", "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString" } } @@ -8311,7 +8312,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 054082ef7fbb..ee929c1e04a6 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,5 +164,4 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. \ No newline at end of file diff --git a/docs/fields.md b/docs/fields.md index 41077a60cfdc..3b9c2cd1327c 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate @@ -3748,7 +3748,7 @@ _No description available_ ## ContainerSetRetryStrategy -_No description available_ +ContainerSetRetryStrategy provides controls on how to retry a container set
Examples with this field (click to open) @@ -3780,7 +3780,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`duration`|`string`|Duration is the time between each retry, examples values are "300ms", "1s" or "5m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".| -|`retries`|[`IntOrString`](#intorstring)|Nbr of retries| +|`retries`|[`IntOrString`](#intorstring)|Retries is the number of retry attempts when retrying a container.| ## DAGTask diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 7ca32a73add5..02358eba219b 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1930,7 +1930,8 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe return common.OpenAPIDefinition{ Schema: spec.Schema{ SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, + Description: "ContainerSetRetryStrategy provides controls on how to retry a container set", + Type: []string{"object"}, Properties: map[string]spec.Schema{ "duration": { SchemaProps: spec.SchemaProps{ @@ -1941,7 +1942,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe }, "retries": { SchemaProps: spec.SchemaProps{ - Description: "Nbr of retries", + Description: "Retries is the number of retry attempts when retrying a container.", Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), }, }, @@ -1988,7 +1989,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry a container nodes in the container set if it fails. Nbr of retries(default 0) and sleep duration between retries(default 0s, instant retry) can be set. The container won't retry if it's unable to locate the command.", + Description: "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 44594fef8eb55a3d6b56bdc492dcaa07eac17b24 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:51:08 +0800 Subject: [PATCH 35/63] fix: codegen Signed-off-by: shuangkun --- docs/container-set-template.md | 3 ++- docs/executor_swagger.md | 5 ++++- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 2 +- pkg/plugins/executor/swagger.yml | 2 ++ .../IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md | 1 + .../IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md | 1 + 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ee929c1e04a6..dd16eee96052 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,4 +164,5 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index e159c9f26106..c95ba5f1eac7 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,7 +1026,10 @@ referred to by services. ### ContainerSetRetryStrategy - +ContainerSetRetryStrategy provides controls on how to retry a container set + + + diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fc4c177f1417..1917b177beaa 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -17,7 +17,7 @@ type ContainerSetTemplate struct { RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } -// ContainerSetRetryStrategy provides controls on how to retry a container set +// ContainerSetRetryStrategy provides controls on how to retry a container set. type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index ef4de7072265..5e44fc6844fe 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1002,6 +1002,8 @@ definitions: title: ContainerPort represents a network port in a single container. type: object ContainerSetRetryStrategy: + description: ContainerSetRetryStrategy provides controls on how to retry a container + set properties: duration: description: |- diff --git a/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md b/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md index 19c11c6bb471..6a059f3b0315 100644 --- a/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md +++ b/sdks/java/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md @@ -2,6 +2,7 @@ # IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy +ContainerSetRetryStrategy provides controls on how to retry a container set ## Properties diff --git a/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md b/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md index 50e09bc46bd8..b7bfad4cd414 100644 --- a/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md +++ b/sdks/python/client/docs/IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy.md @@ -1,5 +1,6 @@ # IoArgoprojWorkflowV1alpha1ContainerSetRetryStrategy +ContainerSetRetryStrategy provides controls on how to retry a container set ## Properties Name | Type | Description | Notes From 996201aa1eab4fd30db7967160734a0bdcefffe1 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 22:57:28 +0800 Subject: [PATCH 36/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index dd16eee96052..0b2e17e6b30e 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -164,5 +164,6 @@ spec: ``` !!! Note + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. From 91cb2032d4dbd667e952891d67481f62524f972e Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:10:08 +0800 Subject: [PATCH 37/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 0b2e17e6b30e..02a610516d60 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -163,7 +163,6 @@ spec: - command ``` -!!! Note +!!! NOTE + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. - A container set will not be retried if a container's `command` cannot be located. - As it will fail each time, the retry logic is short-circuited. From 65c499b13c4c7b93592a74ef5a31d256d8a3b22e Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:12:16 +0800 Subject: [PATCH 38/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 02a610516d60..de42236e8724 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -165,4 +165,3 @@ spec: !!! NOTE A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. - From e079cccd0934e5e1cf9443afbe161490555d2f9c Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:51:07 +0800 Subject: [PATCH 39/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 3 --- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 2 +- pkg/plugins/executor/swagger.yml | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index c95ba5f1eac7..810480b33b53 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,9 +1026,6 @@ referred to by services. ### ContainerSetRetryStrategy -ContainerSetRetryStrategy provides controls on how to retry a container set - - diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index 1917b177beaa..fc4c177f1417 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -17,7 +17,7 @@ type ContainerSetTemplate struct { RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } -// ContainerSetRetryStrategy provides controls on how to retry a container set. +// ContainerSetRetryStrategy provides controls on how to retry a container set type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index 5e44fc6844fe..bbd1d6dd66cc 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1012,6 +1012,8 @@ definitions: type: string retries: $ref: '#/definitions/IntOrString' + title: ContainerSetRetryStrategy provides controls on how to retry a container + set type: object ContainerSetTemplate: properties: From 6248d06f3e4862fb8bff899a32095703cdc7a4fe Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 26 Mar 2024 23:59:25 +0800 Subject: [PATCH 40/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 ++ pkg/plugins/executor/swagger.yml | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index 810480b33b53..c79e70697048 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1026,6 +1026,8 @@ referred to by services. ### ContainerSetRetryStrategy +> ContainerSetRetryStrategy provides controls on how to retry a container set + diff --git a/pkg/plugins/executor/swagger.yml b/pkg/plugins/executor/swagger.yml index bbd1d6dd66cc..5e44fc6844fe 100644 --- a/pkg/plugins/executor/swagger.yml +++ b/pkg/plugins/executor/swagger.yml @@ -1012,8 +1012,6 @@ definitions: type: string retries: $ref: '#/definitions/IntOrString' - title: ContainerSetRetryStrategy provides controls on how to retry a container - set type: object ContainerSetTemplate: properties: From d3d36f4dab0db73e2b0465f6508f4bd2dde1b281 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:00:27 +0800 Subject: [PATCH 41/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index c79e70697048..ec71d9f1bae0 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1032,6 +1032,7 @@ referred to by services. + **Properties** | Name | Type | Go type | Required | Default | Description | Example | From b593f8b1e918093a90206b47ba8568b74a8445fa Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:08:06 +0800 Subject: [PATCH 42/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index ec71d9f1bae0..78f932649638 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1027,7 +1027,7 @@ referred to by services. > ContainerSetRetryStrategy provides controls on how to retry a container set - + From f483b1ea043cea0e7bc770dc533bc40177cbbca5 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 00:28:43 +0800 Subject: [PATCH 43/63] fix: test Signed-off-by: shuangkun --- docs/executor_swagger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/executor_swagger.md b/docs/executor_swagger.md index 78f932649638..e78cbb50df6b 100644 --- a/docs/executor_swagger.md +++ b/docs/executor_swagger.md @@ -1027,7 +1027,7 @@ referred to by services. > ContainerSetRetryStrategy provides controls on how to retry a container set - + From 9b8ec1a86028a9f40bac2de7a6fd6f6a8a6f7012 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:09:46 +0800 Subject: [PATCH 44/63] fix: comments Signed-off-by: shuangkun --- docs/container-set-template.md | 21 +++++++++---------- .../v1alpha1/container_set_template_types.go | 3 ++- .../workflow-template-with-containerset.yaml | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index de42236e8724..38070fa9df93 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -135,15 +135,17 @@ metadata: name: containerset-with-retrystrategy annotations: workflows.argoproj.io/description: | - This workflow template is used to create a workflow with containerset with retrystrategy. + This workflow creates a container set with a retryStrategy. spec: entrypoint: containerset-retrystrategy-example templates: - name: containerset-retrystrategy-example containerSet: retryStrategy: - retries: "2" + retries: "10" # if fails, retry at most ten times + duration: 30s # retry for at most 30s containers: + # this container completes successfully, it won't retried. - name: success image: python:alpine3.6 command: @@ -152,16 +154,13 @@ spec: args: - | print("hi") + # if fails, it will retry at most ten times. - name: fail-retry - image: alpine:latest - command: [ sh, -c ] - args: [ "echo intentional failure; exit 1" ] - - name: invalic-command image: python:alpine3.6 - command: - - invalid - - command + command: ["python", -c] + args: ["import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)"] ``` -!!! NOTE - A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. +!!! Note + A container set will not be retried if a container's `command` cannot be located. + As it will fail each time, the retry logic is short-circuited. diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index fc4c177f1417..4c935279dc49 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -22,7 +22,8 @@ type ContainerSetRetryStrategy struct { // Duration is the time between each retry, examples values are "300ms", "1s" or "5m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Duration string `json:"duration,omitempty" protobuf:"bytes,1,opt,name=duration"` - // Retries is the number of retry attempts when retrying a container. + // Retries is the maximum number of retry attempts for each container. It does not include the + // first, original attempt; the maximum number of total attempts will be `retries + 1`. Retries *intstr.IntOrString `json:"retries" protobuf:"bytes,2,rep,name=retries"` } diff --git a/test/e2e/testdata/workflow-template-with-containerset.yaml b/test/e2e/testdata/workflow-template-with-containerset.yaml index 463c2a3e901f..b2f4c32a880a 100644 --- a/test/e2e/testdata/workflow-template-with-containerset.yaml +++ b/test/e2e/testdata/workflow-template-with-containerset.yaml @@ -4,7 +4,7 @@ metadata: name: containerset-with-retrystrategy annotations: workflows.argoproj.io/description: | - This workflow template is used to create a workflow with containerset. + This workflow creates a container set with a retryStrategy. spec: entrypoint: test templates: From 5bb4f9dce202711e1742cf0fcd41021678533c01 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:29:30 +0800 Subject: [PATCH 45/63] fix: test Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/fields.md | 2 +- pkg/apis/workflow/v1alpha1/generated.proto | 3 ++- pkg/apis/workflow/v1alpha1/openapi_generated.go | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index 63bd6811bd1e..f343b29b5bc9 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4342,7 +4342,7 @@ }, "retries": { "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString", - "description": "Retries is the number of retry attempts when retrying a container." + "description": "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`." } }, "required": [ diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 843665ed4961..443fad387c94 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8294,7 +8294,7 @@ "type": "string" }, "retries": { - "description": "Retries is the number of retry attempts when retrying a container.", + "description": "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.", "$ref": "#/definitions/io.k8s.apimachinery.pkg.util.intstr.IntOrString" } } diff --git a/docs/fields.md b/docs/fields.md index 3b9c2cd1327c..f806874b6dff 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -3780,7 +3780,7 @@ ContainerSetRetryStrategy provides controls on how to retry a container set | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`duration`|`string`|Duration is the time between each retry, examples values are "300ms", "1s" or "5m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".| -|`retries`|[`IntOrString`](#intorstring)|Retries is the number of retry attempts when retrying a container.| +|`retries`|[`IntOrString`](#intorstring)|Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.| ## DAGTask diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 8f2c7b3ed053..94a105f9471c 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -411,7 +411,8 @@ message ContainerSetRetryStrategy { // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". optional string duration = 1; - // Retries is the number of retry attempts when retrying a container. + // Retries is the maximum number of retry attempts for each container. It does not include the + // first, original attempt; the maximum number of total attempts will be `retries + 1`. optional k8s.io.apimachinery.pkg.util.intstr.IntOrString retries = 2; } diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 02358eba219b..9c0775d09dd7 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1942,7 +1942,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetRetryStrategy(ref common.Refe }, "retries": { SchemaProps: spec.SchemaProps{ - Description: "Retries is the number of retry attempts when retrying a container.", + Description: "Retries is the maximum number of retry attempts for each container. It does not include the first, original attempt; the maximum number of total attempts will be `retries + 1`.", Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), }, }, From a1b1a8e0767e3c563fa66913a8e1d1e0ab30034c Mon Sep 17 00:00:00 2001 From: shuangkun Date: Wed, 27 Mar 2024 11:52:06 +0800 Subject: [PATCH 46/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 38070fa9df93..0b66522952a9 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -162,5 +162,5 @@ spec: ``` !!! Note - A container set will not be retried if a container's `command` cannot be located. + A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. From 8d85df2ee68e5bc0795301b72fba7aa76a1f95a8 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:50:26 +0800 Subject: [PATCH 47/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 0b66522952a9..b5e31d54e839 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -158,6 +158,7 @@ spec: - name: fail-retry image: python:alpine3.6 command: ["python", -c] + # fail with a 66% probability args: ["import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)"] ``` From b3543908b208c217552f72af487290a25b09ee94 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:50:38 +0800 Subject: [PATCH 48/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index b5e31d54e839..ce34c73af76a 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -145,7 +145,7 @@ spec: retries: "10" # if fails, retry at most ten times duration: 30s # retry for at most 30s containers: - # this container completes successfully, it won't retried. + # this container completes successfully, so it won't be retried. - name: success image: python:alpine3.6 command: From c743a59f3c0a8d0d4576064391110fbe98ec8bca Mon Sep 17 00:00:00 2001 From: shuangkun Date: Sun, 31 Mar 2024 09:14:26 +0800 Subject: [PATCH 49/63] fix: add note. Signed-off-by: shuangkun --- docs/container-set-template.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ce34c73af76a..7e7b175bef40 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -165,3 +165,5 @@ spec: !!! Note A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. + Since it is only a process-level retry, so won't create new node. + You can confirm whether a retry has been performed by checking the container's log. From db7c161b077e58c508b7e806360a45acffb478d0 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Sun, 31 Mar 2024 09:17:45 +0800 Subject: [PATCH 50/63] fix: add note. Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 7e7b175bef40..c7fd15a6c365 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -165,5 +165,5 @@ spec: !!! Note A container set will not be retried if a container's `command` cannot be located. As it will fail each time, the retry logic is short-circuited. - Since it is only a process-level retry, so won't create new node. + Since it is only a process-level retry, so won't create new node. You can confirm whether a retry has been performed by checking the container's log. From 05094bfbceda6937fc53a3c1088eeeb86776c834 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:19:38 +0800 Subject: [PATCH 51/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index c7fd15a6c365..f9ab761b27af 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -117,7 +117,7 @@ Can you see the problem here? `a` only has small requests, but the container set Solution: do not use container set when you have lopsided requests. -## `retryStrategy` usage +## Inner `retryStrategy` usage > v3.3 and after From 1ab992e1da684816877dfdb34b83858c0d4c9b86 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:20:01 +0800 Subject: [PATCH 52/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index f9ab761b27af..938284b21fcf 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -121,10 +121,7 @@ Solution: do not use container set when you have lopsided requests. > v3.3 and after -You can set a `retryStrategy` to apply to all containers of a container set. - -This currently works differently from [retries](retries.md) for other template types. -You can only set the `duration` between each retry and the total number of `retries`. +You can set an inner `retryStrategy` to apply to all containers of a container set, including the `duration` between each retry and the total number of `retries`. See an example below: From 058bc04541fceb08a6ba1773257910e9529670dd Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:20:39 +0800 Subject: [PATCH 53/63] Update docs/container-set-template.md Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- docs/container-set-template.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 938284b21fcf..41bbe7defb0e 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -159,8 +159,22 @@ spec: args: ["import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)"] ``` -!!! Note - A container set will not be retried if a container's `command` cannot be located. - As it will fail each time, the retry logic is short-circuited. - Since it is only a process-level retry, so won't create new node. - You can confirm whether a retry has been performed by checking the container's log. +!!! Note Template-level `retryStrategy` vs Container Set `retryStrategy` + `containerSet.retryStrategy` works differently from [template-level retries](retries.md): + 1. Your `command` will be re-ran by the Executor inside the same container if it fails. + - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: + ``` + time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true + intentional failure + time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true + time="2024-03-29T06:40:26 UTC" level=info msg="capturing logs" argo=true + time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal urgent I/O condition" argo=true + intentional failure + time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal child exited" argo=true + time="2024-03-29T06:40:26 UTC" level=debug msg="forwarding signal terminated" argo=true + time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true error="" + time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true + Error: exit status 1 + ``` + 1. If a container's `command` cannot be located, it will not be retried. + - As it will fail each time, the retry logic is short-circuited. From c1610491f589e8757cfeb60bf8bdc68557081947 Mon Sep 17 00:00:00 2001 From: shuangkun tian <72060326+shuangkun@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:21:01 +0800 Subject: [PATCH 54/63] Update pkg/apis/workflow/v1alpha1/container_set_template_types.go Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Signed-off-by: shuangkun tian <72060326+shuangkun@users.noreply.github.com> --- pkg/apis/workflow/v1alpha1/container_set_template_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/apis/workflow/v1alpha1/container_set_template_types.go b/pkg/apis/workflow/v1alpha1/container_set_template_types.go index 4c935279dc49..ac1a4f44205f 100644 --- a/pkg/apis/workflow/v1alpha1/container_set_template_types.go +++ b/pkg/apis/workflow/v1alpha1/container_set_template_types.go @@ -13,7 +13,7 @@ type ContainerSetTemplate struct { Containers []ContainerNode `json:"containers" protobuf:"bytes,4,rep,name=containers"` VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty" protobuf:"bytes,3,rep,name=volumeMounts"` // RetryStrategy describes how to retry container nodes if the container set fails. - // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. + // Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers. RetryStrategy *ContainerSetRetryStrategy `json:"retryStrategy,omitempty" protobuf:"bytes,5,opt,name=retryStrategy"` } From 149bf7d40a0f35810e4a11c1f0547b153f97172b Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 08:35:51 +0800 Subject: [PATCH 55/63] fix: codegen Signed-off-by: shuangkun --- api/jsonschema/schema.json | 2 +- api/openapi-spec/swagger.json | 2 +- docs/fields.md | 2 +- pkg/apis/workflow/v1alpha1/generated.proto | 2 +- pkg/apis/workflow/v1alpha1/openapi_generated.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/jsonschema/schema.json b/api/jsonschema/schema.json index 138fc3a40953..f6cd69c3cbbe 100644 --- a/api/jsonschema/schema.json +++ b/api/jsonschema/schema.json @@ -4796,7 +4796,7 @@ }, "retryStrategy": { "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy", - "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time." + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers." }, "volumeMounts": { "items": { diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 72a7edf94892..483615edffd0 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -8748,7 +8748,7 @@ } }, "retryStrategy": { - "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", + "description": "RetryStrategy describes how to retry container nodes if the container set fails. Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers.", "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ContainerSetRetryStrategy" }, "volumeMounts": { diff --git a/docs/fields.md b/docs/fields.md index f806874b6dff..880604d9c1e4 100644 --- a/docs/fields.md +++ b/docs/fields.md @@ -2411,7 +2411,7 @@ _No description available_ | Field Name | Field Type | Description | |:----------:|:----------:|---------------| |`containers`|`Array<`[`ContainerNode`](#containernode)`>`|_No description available_| -|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.| +|`retryStrategy`|[`ContainerSetRetryStrategy`](#containersetretrystrategy)|RetryStrategy describes how to retry container nodes if the container set fails. Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers.| |`volumeMounts`|`Array<`[`VolumeMount`](#volumemount)`>`|_No description available_| ## DAGTemplate diff --git a/pkg/apis/workflow/v1alpha1/generated.proto b/pkg/apis/workflow/v1alpha1/generated.proto index 94a105f9471c..f3910066d35b 100644 --- a/pkg/apis/workflow/v1alpha1/generated.proto +++ b/pkg/apis/workflow/v1alpha1/generated.proto @@ -422,7 +422,7 @@ message ContainerSetTemplate { repeated k8s.io.api.core.v1.VolumeMount volumeMounts = 3; // RetryStrategy describes how to retry container nodes if the container set fails. - // Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time. + // Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers. optional ContainerSetRetryStrategy retryStrategy = 5; } diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 9c0775d09dd7..07904b8206fd 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -1989,7 +1989,7 @@ func schema_pkg_apis_workflow_v1alpha1_ContainerSetTemplate(ref common.Reference }, "retryStrategy": { SchemaProps: spec.SchemaProps{ - Description: "RetryStrategy describes how to retry container nodes if the container set fails. Note that the container set will not be retried if a container's `command` cannot be located, as it will fail each time.", + Description: "RetryStrategy describes how to retry container nodes if the container set fails. Note that this works differently from the template-level `retryStrategy` as it is a process-level retry that does not create new Pods or containers.", Ref: ref("github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1.ContainerSetRetryStrategy"), }, }, From 6e5e6153d7de201a9e339bdfa2bb4bbed178b567 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 08:55:37 +0800 Subject: [PATCH 56/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 41bbe7defb0e..72e1863eac10 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -176,5 +176,5 @@ spec: time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true Error: exit status 1 ``` - 1. If a container's `command` cannot be located, it will not be retried. + 2. If a container's `command` cannot be located, it will not be retried. - As it will fail each time, the retry logic is short-circuited. From 87ac23dc76fb1bc651f0bc3433e280cbacda2e62 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:06:50 +0800 Subject: [PATCH 57/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 72e1863eac10..9feabd1f73ee 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -163,7 +163,7 @@ spec: `containerSet.retryStrategy` works differently from [template-level retries](retries.md): 1. Your `command` will be re-ran by the Executor inside the same container if it fails. - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: - ``` + ```text time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true intentional failure time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true From 50a227056acee92f26f80eb566ad85bd5315de62 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:19:53 +0800 Subject: [PATCH 58/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 9feabd1f73ee..1c342cdf52cf 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -163,18 +163,18 @@ spec: `containerSet.retryStrategy` works differently from [template-level retries](retries.md): 1. Your `command` will be re-ran by the Executor inside the same container if it fails. - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: - ```text - time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true - intentional failure - time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true - time="2024-03-29T06:40:26 UTC" level=info msg="capturing logs" argo=true - time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal urgent I/O condition" argo=true - intentional failure - time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal child exited" argo=true - time="2024-03-29T06:40:26 UTC" level=debug msg="forwarding signal terminated" argo=true - time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true error="" - time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true - Error: exit status 1 - ``` - 2. If a container's `command` cannot be located, it will not be retried. +```text +time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true +intentional failure +time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true +time="2024-03-29T06:40:26 UTC" level=info msg="capturing logs" argo=true +time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal urgent I/O condition" argo=true +intentional failure +time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal child exited" argo=true +time="2024-03-29T06:40:26 UTC" level=debug msg="forwarding signal terminated" argo=true +time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true error="" +time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true +Error: exit status 1 +``` + 1. If a container's `command` cannot be located, it will not be retried. - As it will fail each time, the retry logic is short-circuited. From 22caa4554260b17823692e0ba22150be7986dfd1 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:22:18 +0800 Subject: [PATCH 59/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 1c342cdf52cf..ba1955d4a56c 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -177,4 +177,4 @@ time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main con Error: exit status 1 ``` 1. If a container's `command` cannot be located, it will not be retried. - - As it will fail each time, the retry logic is short-circuited. + - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file From 8f3655fe4ad0352e9ac53ef2162f202197d60cff Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:24:56 +0800 Subject: [PATCH 60/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index ba1955d4a56c..daa813383c69 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -176,5 +176,7 @@ time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true err time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true Error: exit status 1 ``` + +!!! Note 1. If a container's `command` cannot be located, it will not be retried. - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file From 836aff0b631b6b5b6a1874d6f8435c04f1d2830e Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:28:21 +0800 Subject: [PATCH 61/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index daa813383c69..90ffc5cbf3e1 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -163,6 +163,7 @@ spec: `containerSet.retryStrategy` works differently from [template-level retries](retries.md): 1. Your `command` will be re-ran by the Executor inside the same container if it fails. - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: + ```text time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true intentional failure @@ -179,4 +180,4 @@ Error: exit status 1 !!! Note 1. If a container's `command` cannot be located, it will not be retried. - - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file From af66061d36309c9f0330a4cd0b18587a71c83df8 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Thu, 4 Apr 2024 09:32:43 +0800 Subject: [PATCH 62/63] fix: docs Signed-off-by: shuangkun --- docs/container-set-template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 90ffc5cbf3e1..7ff22e068f77 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -180,4 +180,4 @@ Error: exit status 1 !!! Note 1. If a container's `command` cannot be located, it will not be retried. - - As it will fail each time, the retry logic is short-circuited. \ No newline at end of file + - As it will fail each time, the retry logic is short-circuited. From 02ce8daf86fda50c244f4e0a387942cec422b1d7 Mon Sep 17 00:00:00 2001 From: Anton Gilgur Date: Thu, 4 Apr 2024 01:43:06 -0400 Subject: [PATCH 63/63] fix markdown in admonition with an ignore and some new line corrections Signed-off-by: Anton Gilgur --- docs/container-set-template.md | 39 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/container-set-template.md b/docs/container-set-template.md index 7ff22e068f77..2108fa3d08b9 100644 --- a/docs/container-set-template.md +++ b/docs/container-set-template.md @@ -151,7 +151,7 @@ spec: args: - | print("hi") - # if fails, it will retry at most ten times. + # if fails, it will retry at most ten times. - name: fail-retry image: python:alpine3.6 command: ["python", -c] @@ -159,25 +159,30 @@ spec: args: ["import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)"] ``` -!!! Note Template-level `retryStrategy` vs Container Set `retryStrategy` + + +!!! Note "Template-level `retryStrategy` vs Container Set `retryStrategy`" `containerSet.retryStrategy` works differently from [template-level retries](retries.md): + 1. Your `command` will be re-ran by the Executor inside the same container if it fails. - - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: -```text -time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true -intentional failure -time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true -time="2024-03-29T06:40:26 UTC" level=info msg="capturing logs" argo=true -time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal urgent I/O condition" argo=true -intentional failure -time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal child exited" argo=true -time="2024-03-29T06:40:26 UTC" level=debug msg="forwarding signal terminated" argo=true -time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true error="" -time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true -Error: exit status 1 -``` + - As no new containers are created, the nodes in the UI remain the same, and the retried logs are appended to original container's logs. For example, your container logs may look like: + ```text + time="2024-03-29T06:40:25 UTC" level=info msg="capturing logs" argo=true + intentional failure + time="2024-03-29T06:40:25 UTC" level=debug msg="ignore signal child exited" argo=true + time="2024-03-29T06:40:26 UTC" level=info msg="capturing logs" argo=true + time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal urgent I/O condition" argo=true + intentional failure + time="2024-03-29T06:40:26 UTC" level=debug msg="ignore signal child exited" argo=true + time="2024-03-29T06:40:26 UTC" level=debug msg="forwarding signal terminated" argo=true + time="2024-03-29T06:40:27 UTC" level=info msg="sub-process exited" argo=true error="" + time="2024-03-29T06:40:27 UTC" level=info msg="not saving outputs - not main container" argo=true + Error: exit status 1 + ``` -!!! Note 1. If a container's `command` cannot be located, it will not be retried. + - As it will fail each time, the retry logic is short-circuited. + +