From c0f879cc922ba77c63e13154a85be04285aff194 Mon Sep 17 00:00:00 2001 From: Alan Clucas Date: Thu, 15 Aug 2024 12:37:06 +0100 Subject: [PATCH] feat: enable configuration of temporality in opentelemetry metrics (#13267) Signed-off-by: Alan Clucas Signed-off-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> Co-authored-by: Anton Gilgur <4970083+agilgur5@users.noreply.github.com> --- .spelling | 1 + config/config.go | 11 +++++++++++ docs/metrics.md | 10 +++++++++- docs/workflow-controller-configmap.yaml | 2 ++ workflow/controller/controller.go | 1 + workflow/metrics/metrics.go | 21 ++++++++++++++++++++- 6 files changed, 44 insertions(+), 2 deletions(-) diff --git a/.spelling b/.spelling index df9fc644c217..f9c44d655bcf 100644 --- a/.spelling +++ b/.spelling @@ -205,6 +205,7 @@ sandboxed shortcodes stateful stderr +temporality triaged un-reconciled v1 diff --git a/config/config.go b/config/config.go index c94efd1b590f..178d8d955ac3 100644 --- a/config/config.go +++ b/config/config.go @@ -245,6 +245,13 @@ type MySQLConfig struct { Options map[string]string `json:"options,omitempty"` } +type MetricsTemporality string + +const ( + MetricsTemporalityCumulative MetricsTemporality = "Cumulative" + MetricsTemporalityDelta MetricsTemporality = "Delta" +) + // MetricsConfig defines a config for a metrics server type MetricsConfig struct { // Enabled controls metric emission. Default is true, set "enabled: false" to turn off @@ -262,6 +269,10 @@ type MetricsConfig struct { IgnoreErrors bool `json:"ignoreErrors,omitempty"` // Secure is a flag that starts the metrics servers using TLS, defaults to true Secure *bool `json:"secure,omitempty"` + // Temporality of the OpenTelemetry metrics. + // Enum of Cumulative or Delta, defaulting to Cumulative. + // No effect on Prometheus metrics, which are always Cumulative. + Temporality MetricsTemporality `json:"temporality,omitempty"` } func (mc MetricsConfig) GetSecure(defaultValue bool) bool { diff --git a/docs/metrics.md b/docs/metrics.md index 1bbf155211ae..208dfacff4ee 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -36,7 +36,7 @@ It will not be enabled if left blank, unlike some other implementations. You can configure the protocol using the environment variables documented in [standard environment variables](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/). -The [configuration option](#common) in the controller ConfigMap `metricsTTL` affects the OpenTelemetry behavior, but the other parameters do not. +The [configuration options](#common) in the controller ConfigMap `metricsTTL` and `temporality` affect the OpenTelemetry behavior, but the other parameters do not. To use the [OpenTelemetry collector](https://opentelemetry.io/docs/collector/) you can configure it @@ -50,6 +50,14 @@ receivers: You can use the [OpenTelemetry operator](https://opentelemetry.io/docs/kubernetes/operator/) to setup the collector and instrument the workflow-controller. +You can configure the [temporality](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#temporality) of OpenTelemetry metrics in the [Workflow Controller ConfigMap](workflow-controller-configmap.md). + +```yaml +metricsConfig: | + # >= 3.6. Which temporality to use for OpenTelemetry. Default is "Cumulative" + temporality: Delta +``` + ### Prometheus scraping You can adjust various elements of the Prometheus metrics configuration by changing values in the [Workflow Controller Config Map](workflow-controller-configmap.md). diff --git a/docs/workflow-controller-configmap.yaml b/docs/workflow-controller-configmap.yaml index 7b357c6b9779..292edd3303e1 100644 --- a/docs/workflow-controller-configmap.yaml +++ b/docs/workflow-controller-configmap.yaml @@ -228,6 +228,8 @@ data: # Use a self-signed cert for TLS # >= 3.6: default true secure: true + # >= 3.6. Which temporality to use for OpenTelemetry. Default is "Cumulative" + temporality: Delta # DEPRECATED: Legacy metrics are now removed, this field is ignored disableLegacy: false diff --git a/workflow/controller/controller.go b/workflow/controller/controller.go index daa4c08fa9bb..d030cf1906cd 100644 --- a/workflow/controller/controller.go +++ b/workflow/controller/controller.go @@ -1360,6 +1360,7 @@ func (wfc *WorkflowController) getMetricsServerConfig() *metrics.Config { TTL: time.Duration(wfc.Config.MetricsConfig.MetricsTTL), IgnoreErrors: wfc.Config.MetricsConfig.IgnoreErrors, Secure: wfc.Config.MetricsConfig.GetSecure(true), + Temporality: wfc.Config.MetricsConfig.Temporality, } return &metricsConfig } diff --git a/workflow/metrics/metrics.go b/workflow/metrics/metrics.go index b17a61b5a48c..87b3cfad05d6 100644 --- a/workflow/metrics/metrics.go +++ b/workflow/metrics/metrics.go @@ -9,10 +9,13 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + wfconfig "github.com/argoproj/argo-workflows/v3/config" + "go.opentelemetry.io/contrib/instrumentation/runtime" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/metric" metricsdk "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" "go.opentelemetry.io/otel/sdk/resource" semconv "go.opentelemetry.io/otel/semconv/v1.24.0" ) @@ -24,6 +27,7 @@ type Config struct { TTL time.Duration IgnoreErrors bool Secure bool + Temporality wfconfig.MetricsTemporality } type Metrics struct { @@ -52,7 +56,7 @@ func New(ctx context.Context, serviceName string, config *Config, callbacks Call _, otlpMetricsEnabled := os.LookupEnv(`OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`) if otlpEnabled || otlpMetricsEnabled { log.Info("Starting OTLP metrics exporter") - otelExporter, err := otlpmetricgrpc.New(ctx) + otelExporter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithTemporalitySelector(getTemporality(config))) if err != nil { return nil, err } @@ -117,3 +121,18 @@ func (m *Metrics) populate(ctx context.Context, adders ...addMetric) error { } return nil } + +func getTemporality(config *Config) metricsdk.TemporalitySelector { + switch config.Temporality { + case wfconfig.MetricsTemporalityCumulative: + return func(metricsdk.InstrumentKind) metricdata.Temporality { + return metricdata.CumulativeTemporality + } + case wfconfig.MetricsTemporalityDelta: + return func(metricsdk.InstrumentKind) metricdata.Temporality { + return metricdata.DeltaTemporality + } + default: + return metricsdk.DefaultTemporalitySelector + } +}