From 9c9e0db53abf4444e16d2155378b704eb6fb0411 Mon Sep 17 00:00:00 2001 From: ChrsMark Date: Mon, 9 Dec 2024 12:31:05 +0200 Subject: [PATCH] Add k8s metrics for jobs and cronjobs Signed-off-by: ChrsMark --- .chloggen/add_k8s_jobs.yaml | 22 ++++ docs/non-normative/k8s-migration.md | 39 +++++++ docs/system/k8s-metrics.md | 172 ++++++++++++++++++++++++++++ model/k8s/metrics.yaml | 82 +++++++++++++ 4 files changed, 315 insertions(+) create mode 100755 .chloggen/add_k8s_jobs.yaml diff --git a/.chloggen/add_k8s_jobs.yaml b/.chloggen/add_k8s_jobs.yaml new file mode 100755 index 0000000000..1937c814f5 --- /dev/null +++ b/.chloggen/add_k8s_jobs.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. +# +# If your change doesn't affect end users you should instead start +# your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the area of concern in the attributes-registry, (e.g. http, cloud, db) +component: k8s + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add k8s metrics for job and cronjob + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +# The values here must be integers. +issues: [1660] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/docs/non-normative/k8s-migration.md b/docs/non-normative/k8s-migration.md index aafab07ed6..39e7d464c5 100644 --- a/docs/non-normative/k8s-migration.md +++ b/docs/non-normative/k8s-migration.md @@ -49,6 +49,8 @@ and one for disabling the old schema called `semconv.k8s.disableLegacy`. Then: - [K8s StatefulsSet metrics](#k8s-statefulsset-metrics) - [K8s HorizontalPodAutoscaler metrics](#k8s-horizontalpodautoscaler-metrics) - [K8s DaemonSet metrics](#k8s-daemonset-metrics) + - [K8s Job metrics](#k8s-job-metrics) + - [K8s Cronjob metrics](#k8s-cronjob-metrics) @@ -195,3 +197,40 @@ The changes in their metric types are the following: | `k8s.daemonset.ready_nodes` (type: `gauge`) | `k8s.daemonset.ready_nodes` (type: `updowncounter`) | + +### K8s Job metrics + +The K8s Job metrics implemented by the Collector and specifically the +[k8scluster](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.115.0/receiver/k8sclusterreceiver/documentation.md) +receiver were introduced as semantic conventions in +[#1649](https://github.com/open-telemetry/semantic-conventions/pull/1660) (TODO: replace with SemConv version once +available). + +The changes in their metric types are the following: + + + +| Old (Collector) ![changed](https://img.shields.io/badge/changed-orange?style=flat) | New | +|----------------------------------------------------------|----------------------------------------| +| `k8s.job.active_pods` (type: `gauge`) | `k8s.job.active_pods` (type: `updowncounter`) | +| `k8s.job.failed_pods` (type: `gauge`) | `k8s.job.failed_pods` (type: `updowncounter`) | +| `k8s.job.desired_successful_pods` (type: `gauge`) | `k8s.job.desired_successful_pods` (type: `updowncounter`) | +| `k8s.job.max_parallel_pods` (type: `gauge`) | `k8s.job.max_parallel_pods` (type: `updowncounter`) | + +### K8s Cronjob metrics + +The K8s Cronjob metrics implemented by the Collector and specifically the +[k8scluster](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.115.0/receiver/k8sclusterreceiver/documentation.md) +receiver were introduced as semantic conventions in +[#1649](https://github.com/open-telemetry/semantic-conventions/pull/1660) (TODO: replace with SemConv version once +available). + +The changes in their metric types are the following: + + + +| Old (Collector) ![changed](https://img.shields.io/badge/changed-orange?style=flat) | New | +|--------------------------------------------------|--------------------------------| +| `k8s.cronjob.active_jobs` (type: `gauge`) | `k8s.cronjob.active_jobs` (type: `updowncounter`) | + + diff --git a/docs/system/k8s-metrics.md b/docs/system/k8s-metrics.md index ccae842e5d..e95ba7fec1 100644 --- a/docs/system/k8s-metrics.md +++ b/docs/system/k8s-metrics.md @@ -57,6 +57,14 @@ and therefore inherit its attributes, like `k8s.pod.name` and `k8s.pod.uid`. - [Metric: `k8s.daemonset.desired_scheduled_nodes`](#metric-k8sdaemonsetdesired_scheduled_nodes) - [Metric: `k8s.daemonset.misscheduled_nodes`](#metric-k8sdaemonsetmisscheduled_nodes) - [Metric: `k8s.daemonset.ready_nodes`](#metric-k8sdaemonsetready_nodes) +- [Job Metrics](#job-metrics) + - [Metric: `k8s.job.active_pods`](#metric-k8sjobactive_pods) + - [Metric: `k8s.job.failed_pods`](#metric-k8sjobfailed_pods) + - [Metric: `k8s.job.successful_pods`](#metric-k8sjobsuccessful_pods) + - [Metric: `k8s.job.desired_successful_pods`](#metric-k8sjobdesired_successful_pods) + - [Metric: `k8s.job.max_parallel_pods`](#metric-k8sjobmax_parallel_pods) +- [CronJob Metrics](#cronjob-metrics) + - [Metric: `k8s.cronjob.active_jobs`](#metric-k8scronjobactive_jobs) @@ -856,5 +864,169 @@ This metric SHOULD, at a minimum, be reported against a +## Job Metrics + +**Description:** Job level metrics captured under the namespace `k8s.job`. + +### Metric: `k8s.job.active_pods` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.job.active_pods` | UpDownCounter | `{pod}` | The number of pending and actively running pods for a job [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `active` field of the +[K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + +This metric SHOULD, at a minimum, be reported against a +[`k8s.job`](../resource/k8s.md#job) resource. + + + + + + +### Metric: `k8s.job.failed_pods` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.job.failed_pods` | UpDownCounter | `{pod}` | The number of pods which reached phase Failed for a job [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `failed` field of the +[K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + +This metric SHOULD, at a minimum, be reported against a +[`k8s.job`](../resource/k8s.md#job) resource. + + + + + + +### Metric: `k8s.job.successful_pods` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.job.successful_pods` | UpDownCounter | `{pod}` | The number of pods which reached phase Succeeded for a job [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `succeeded` field of the +[K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + +This metric SHOULD, at a minimum, be reported against a +[`k8s.job`](../resource/k8s.md#job) resource. + + + + + + +### Metric: `k8s.job.desired_successful_pods` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.job.desired_successful_pods` | UpDownCounter | `{pod}` | The desired number of successfully finished pods the job should be run with [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `completions` field of the +[K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch). + +This metric SHOULD, at a minimum, be reported against a +[`k8s.job`](../resource/k8s.md#job) resource. + + + + + + +### Metric: `k8s.job.max_parallel_pods` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.job.max_parallel_pods` | UpDownCounter | `{pod}` | The max desired number of pods the job should run at any given time [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `parallelism` field of the +[K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch. + +This metric SHOULD, at a minimum, be reported against a +[`k8s.job`](../resource/k8s.md#job) resource. + + + + + + +## CronJob Metrics + +**Description:** CronJob level metrics captured under the namespace `k8s.cronjob`. + +### Metric: `k8s.cronjob.active_jobs` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.cronjob.active_jobs` | UpDownCounter | `{job}` | The number of actively running jobs for a cronjob [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + +**[1]:** This metric aligns with the `active` field of the +[K8s CronJobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#cronjobstatus-v1-batch). + +This metric SHOULD, at a minimum, be reported against a +[`k8s.cronjob`](../resource/k8s.md#cronjob) resource. + + + + + + [DocumentStatus]: https://opentelemetry.io/docs/specs/otel/document-status [MetricRecommended]: /docs/general/metric-requirement-level.md#recommended diff --git a/model/k8s/metrics.yaml b/model/k8s/metrics.yaml index 6debfdc24c..1301662270 100644 --- a/model/k8s/metrics.yaml +++ b/model/k8s/metrics.yaml @@ -360,3 +360,85 @@ groups: [`k8s.daemonset`](../resource/k8s.md#daemonset) resource. instrument: updowncounter unit: "{node}" + + # k8s.job.* metrics + - id: metric.k8s.job.active_pods + type: metric + metric_name: k8s.job.active_pods + stability: experimental + brief: "The number of pending and actively running pods for a job" + instrument: updowncounter + unit: "{pod}" + note: | + This metric aligns with the `active` field of the + [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + + This metric SHOULD, at a minimum, be reported against a + [`k8s.job`](../resource/k8s.md#job) resource. + - id: metric.k8s.job.failed_pods + type: metric + metric_name: k8s.job.failed_pods + stability: experimental + brief: "The number of pods which reached phase Failed for a job" + instrument: updowncounter + unit: "{pod}" + note: | + This metric aligns with the `failed` field of the + [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + + This metric SHOULD, at a minimum, be reported against a + [`k8s.job`](../resource/k8s.md#job) resource. + - id: metric.k8s.job.successful_pods + type: metric + metric_name: k8s.job.successful_pods + stability: experimental + brief: "The number of pods which reached phase Succeeded for a job" + instrument: updowncounter + unit: "{pod}" + note: | + This metric aligns with the `succeeded` field of the + [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). + + This metric SHOULD, at a minimum, be reported against a + [`k8s.job`](../resource/k8s.md#job) resource. + - id: metric.k8s.job.desired_successful_pods + type: metric + metric_name: k8s.job.desired_successful_pods + stability: experimental + brief: "The desired number of successfully finished pods the job should be run with" + instrument: updowncounter + unit: "{pod}" + note: | + This metric aligns with the `completions` field of the + [K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch). + + This metric SHOULD, at a minimum, be reported against a + [`k8s.job`](../resource/k8s.md#job) resource. + - id: metric.k8s.job.max_parallel_pods + type: metric + metric_name: k8s.job.max_parallel_pods + stability: experimental + brief: "The max desired number of pods the job should run at any given time" + instrument: updowncounter + unit: "{pod}" + note: | + This metric aligns with the `parallelism` field of the + [K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch. + + This metric SHOULD, at a minimum, be reported against a + [`k8s.job`](../resource/k8s.md#job) resource. + + # k8s.job.* metrics + - id: metric.k8s.cronjob.active_jobs + type: metric + metric_name: k8s.cronjob.active_jobs + stability: experimental + brief: "The number of actively running jobs for a cronjob" + instrument: updowncounter + unit: "{job}" + note: | + This metric aligns with the `active` field of the + [K8s CronJobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#cronjobstatus-v1-batch). + + This metric SHOULD, at a minimum, be reported against a + [`k8s.cronjob`](../resource/k8s.md#cronjob) resource.