diff --git a/docs/metrics.md b/docs/metrics.md index f4f013e4e568..36c51e89d5ab 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -215,6 +215,15 @@ The build information for this workflow controller | `treestate` | Whether the git tree was `dirty` or `clean` when this was built | | `tag` | The tag on the git commit or `untagged` if it was not tagged | +#### `cronworkflows_triggered_total` + +A counter of the number of times a CronWorkflow has been + +| attribute | explanation | +|-------------|-------------------------------------------| +| `name` | ⚠️ The name of the CronWorkflow. | +| `namespace` | The namespace in which the pod is running | + #### `gauge` A gauge of the number of workflows currently in the cluster in each phase. The `Running` count does not mean that a workflows pods are running, just that the controller has scheduled them. A workflow can be stuck in `Running` with pending pods for a long time. diff --git a/docs/upgrading.md b/docs/upgrading.md index 146ef003f28f..35ff583b0df1 100644 --- a/docs/upgrading.md +++ b/docs/upgrading.md @@ -26,6 +26,7 @@ These notes explain the differences in using the Prometheus `/metrics` endpoint The following are new metrics: * `controller_build_info` +* `cronworkflows_triggered_total` * `k8s_request_duration` * `leader` * `pods_total_count` diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go index 3bc6dcc91240..f6af1bf4b954 100644 --- a/test/e2e/metrics_test.go +++ b/test/e2e/metrics_test.go @@ -142,6 +142,22 @@ func (s *MetricsSuite) TestPodPendingMetric() { WaitForWorkflowDeletion() } +func (s *MetricsSuite) TestCronTriggeredCounter() { + s.Given(). + CronWorkflow(`@testdata/cronworkflow-metrics.yaml`). + When(). + CreateCronWorkflow(). + Wait(1 * time.Minute). // This pattern is used in cron_test.go too + Then(). + ExpectCron(func(t *testing.T, cronWf *wfv1.CronWorkflow) { + s.e(s.T()).GET(""). + Expect(). + Status(200). + Body(). + Contains(`cronworkflows_triggered_total{name="test-cron-metric",namespace="argo"} 1`) + }) +} + func TestMetricsSuite(t *testing.T) { suite.Run(t, new(MetricsSuite)) } diff --git a/test/e2e/testdata/cronworkflow-metrics.yaml b/test/e2e/testdata/cronworkflow-metrics.yaml new file mode 100644 index 000000000000..e3fdd9c13e9b --- /dev/null +++ b/test/e2e/testdata/cronworkflow-metrics.yaml @@ -0,0 +1,19 @@ +apiVersion: argoproj.io/v1alpha1 +kind: CronWorkflow +metadata: + name: test-cron-metric +spec: + schedule: "* * * * *" + concurrencyPolicy: "Allow" + startingDeadlineSeconds: 0 + workflowSpec: + metadata: + labels: + workflows.argoproj.io/test: "true" + podGC: + strategy: OnPodCompletion + entrypoint: whalesay + templates: + - name: whalesay + container: + image: argoproj/argosay:v2 diff --git a/workflow/cron/operator.go b/workflow/cron/operator.go index ca5d8e6a0118..d2d3869e5095 100644 --- a/workflow/cron/operator.go +++ b/workflow/cron/operator.go @@ -80,6 +80,7 @@ func (woc *cronWfOperationCtx) run(ctx context.Context, scheduledRuntime time.Ti defer woc.persistUpdate(ctx) woc.log.Infof("Running %s", woc.name) + woc.metrics.CronWfTrigger(ctx, woc.name, woc.cronWf.ObjectMeta.Namespace) // If the cron workflow has a schedule that was just updated, update its annotation if woc.cronWf.IsUsingNewSchedule() { diff --git a/workflow/metrics/counter_cronworkflow_trigger.go b/workflow/metrics/counter_cronworkflow_trigger.go new file mode 100644 index 000000000000..2f1950e4331e --- /dev/null +++ b/workflow/metrics/counter_cronworkflow_trigger.go @@ -0,0 +1,25 @@ +package metrics + +import ( + "context" +) + +const ( + nameCronTriggered = `cronworkflows_triggered_total` +) + +func addCronWfTriggerCounter(_ context.Context, m *Metrics) error { + return m.createInstrument(int64Counter, + nameCronTriggered, + "Total number of cron workflows triggered", + "{cronworkflow}", + withAsBuiltIn(), + ) +} + +func (m *Metrics) CronWfTrigger(ctx context.Context, name, namespace string) { + m.addInt(ctx, nameCronTriggered, 1, instAttribs{ + {name: labelCronWFName, value: name}, + {name: labelWorkflowNamespace, value: namespace}, + }) +} diff --git a/workflow/metrics/labels.go b/workflow/metrics/labels.go index 9652408e4a16..8678e7d41a64 100644 --- a/workflow/metrics/labels.go +++ b/workflow/metrics/labels.go @@ -10,6 +10,8 @@ const ( labelBuildGitTreeState string = `treestate` labelBuildGitTag string = `tag` + labelCronWFName string = `name` + labelErrorCause string = "cause" labelLogLevel string = `level` diff --git a/workflow/metrics/metrics.go b/workflow/metrics/metrics.go index 3f74d05de3fe..ca0ad92e89d4 100644 --- a/workflow/metrics/metrics.go +++ b/workflow/metrics/metrics.go @@ -100,6 +100,7 @@ func New(ctx context.Context, serviceName string, config *Config, callbacks Call addPodMissingCounter, addPodPendingCounter, addWorkflowPhaseGauge, + addCronWfTriggerCounter, addOperationDurationHistogram, addErrorCounter, addLogCounter,