diff --git a/component/otelcol/connector/spanmetrics/spanmetrics_test.go b/component/otelcol/connector/spanmetrics/spanmetrics_test.go index 62f65966a36c..7a3a3f891504 100644 --- a/component/otelcol/connector/spanmetrics/spanmetrics_test.go +++ b/component/otelcol/connector/spanmetrics/spanmetrics_test.go @@ -1,10 +1,14 @@ package spanmetrics_test import ( + "context" "testing" "time" "github.com/grafana/agent/component/otelcol/connector/spanmetrics" + "github.com/grafana/agent/component/otelcol/processor/processortest" + "github.com/grafana/agent/pkg/flow/componenttest" + "github.com/grafana/agent/pkg/util" "github.com/grafana/river" "github.com/open-telemetry/opentelemetry-collector-contrib/connector/spanmetricsconnector" "github.com/stretchr/testify/require" @@ -331,3 +335,417 @@ func TestArguments_UnmarshalRiver(t *testing.T) { }) } } + +func testRunProcessor(t *testing.T, processorConfig string, testSignal processortest.Signal) { + ctx := componenttest.TestContext(t) + testRunProcessorWithContext(ctx, t, processorConfig, testSignal) +} + +func testRunProcessorWithContext(ctx context.Context, t *testing.T, processorConfig string, testSignal processortest.Signal) { + l := util.TestLogger(t) + + ctrl, err := componenttest.NewControllerFromID(l, "otelcol.connector.spanmetrics") + require.NoError(t, err) + + var args spanmetrics.Arguments + require.NoError(t, river.Unmarshal([]byte(processorConfig), &args)) + + // Override the arguments so signals get forwarded to the test channel. + args.Output = testSignal.MakeOutput() + + prc := processortest.ProcessorRunConfig{ + Ctx: ctx, + T: t, + Args: args, + TestSignal: testSignal, + Ctrl: ctrl, + L: l, + } + processortest.TestRunProcessor(prc) +} + +func Test_ComponentIO(t *testing.T) { + const defaultInputTrace = `{ + "resourceSpans": [{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "res_attribute1", + "value": { "intValue": "11" } + }] + }, + "scopeSpans": [{ + "spans": [{ + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381e", + "name": "TestSpan", + "attributes": [{ + "key": "attribute1", + "value": { "intValue": "78" } + }] + }] + }] + },{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "res_attribute1", + "value": { "intValue": "11" } + }] + }, + "scopeSpans": [{ + "spans": [{ + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381b", + "name": "TestSpan", + "attributes": [{ + "key": "attribute1", + "value": { "intValue": "78" } + }] + }] + }] + }] + }` + + tests := []struct { + testName string + cfg string + inputTraceJson string + expectedOutputLogJson string + }{ + { + testName: "Sum metric only", + cfg: ` + metrics_flush_interval = "1s" + histogram { + disable = true + explicit {} + } + + output { + // no-op: will be overridden by test code. + } + `, + inputTraceJson: defaultInputTrace, + expectedOutputLogJson: `{ + "resourceMetrics": [{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "res_attribute1", + "value": { "intValue": "11" } + }] + }, + "scopeMetrics": [{ + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [{ + "name": "calls", + "sum": { + "dataPoints": [{ + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + }], + "startTimeUnixNano": "0", + "timeUnixNano": "0", + "asInt": "2" + }], + "aggregationTemporality": 2, + "isMonotonic": true + } + }] + }] + }] + }`, + }, + { + testName: "Sum metric only for two spans", + cfg: ` + metrics_flush_interval = "1s" + histogram { + disable = true + explicit {} + } + + output { + // no-op: will be overridden by test code. + } + `, + inputTraceJson: `{ + "resourceSpans": [{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + }] + }, + "scopeSpans": [{ + "spans": [{ + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381e", + "name": "TestSpan", + "attributes": [{ + "key": "attribute1", + "value": { "intValue": "78" } + }] + }] + }] + },{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "second" } + }] + }, + "scopeSpans": [{ + "spans": [{ + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381b", + "name": "TestSpan", + "attributes": [{ + "key": "attribute1", + "value": { "intValue": "78" } + }] + }] + }] + }] + }`, + expectedOutputLogJson: `{ + "resourceMetrics": [{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + }] + }, + "scopeMetrics": [{ + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [{ + "name": "calls", + "sum": { + "dataPoints": [{ + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + }], + "startTimeUnixNano": "0", + "timeUnixNano": "0", + "asInt": "1" + }], + "aggregationTemporality": 2, + "isMonotonic": true + } + }] + }] + }, + { + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "second" } + }] + }, + "scopeMetrics": [{ + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [{ + "name": "calls", + "sum": { + "dataPoints": [{ + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + }], + "startTimeUnixNano": "0", + "timeUnixNano": "0", + "asInt": "1" + }], + "aggregationTemporality": 2, + "isMonotonic": true + } + }] + }] + }] + }`, + }, + { + testName: "Sum and histogram", + cfg: ` + metrics_flush_interval = "1s" + histogram { + explicit { + buckets = ["5m", "10m", "30m"] + } + } + + output { + // no-op: will be overridden by test code. + } + `, + inputTraceJson: defaultInputTrace, + expectedOutputLogJson: `{ + "resourceMetrics": [{ + "resource": { + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "res_attribute1", + "value": { "intValue": "11" } + }] + }, + "scopeMetrics": [{ + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [{ + "name": "calls", + "sum": { + "dataPoints": [{ + "attributes": [{ + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + }], + "startTimeUnixNano": "0", + "timeUnixNano": "0", + "asInt": "2" + }], + "aggregationTemporality": 2, + "isMonotonic": true + } + }, + { + "name": "duration", + "unit": "ms", + "histogram": { + "dataPoints": [ + { + "attributes": [ + { + "key": "service.name", + "value": { + "stringValue": "TestSvcName" + } + }, + { + "key": "span.name", + "value": { + "stringValue": "TestSpan" + } + }, + { + "key": "span.kind", + "value": { + "stringValue": "SPAN_KIND_UNSPECIFIED" + } + }, + { + "key": "status.code", + "value": { + "stringValue": "STATUS_CODE_UNSET" + } + } + ], + "count": "2", + "sum": 0, + "bucketCounts": [ "2", "0", "0", "0" ], + "explicitBounds": [ 300000, 600000, 1800000 ] + } + ], + "aggregationTemporality": 2 + } + }] + }] + }] + }`, + }, + } + + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + var args spanmetrics.Arguments + require.NoError(t, river.Unmarshal([]byte(tt.cfg), &args)) + + testRunProcessor(t, tt.cfg, processortest.NewTraceToMetricSignal(tt.inputTraceJson, tt.expectedOutputLogJson)) + }) + } +} diff --git a/component/otelcol/processor/processortest/processortest.go b/component/otelcol/processor/processortest/processortest.go index 3153e662c656..0298f8e9250b 100644 --- a/component/otelcol/processor/processortest/processortest.go +++ b/component/otelcol/processor/processortest/processortest.go @@ -108,6 +108,105 @@ func (s traceToLogSignal) CheckOutput(t *testing.T) { } } +// +// Trace to Metrics +// + +type traceToMetricSignal struct { + metricCh chan pmetric.Metrics + inputTrace ptrace.Traces + expectedOuutputMetric pmetric.Metrics +} + +// Any timestamps inside expectedOutputJson should be set to 0. +func NewTraceToMetricSignal(inputJson string, expectedOutputJson string) Signal { + return &traceToMetricSignal{ + metricCh: make(chan pmetric.Metrics), + inputTrace: CreateTestTraces(inputJson), + expectedOuutputMetric: CreateTestMetrics(expectedOutputJson), + } +} + +func (s traceToMetricSignal) MakeOutput() *otelcol.ConsumerArguments { + return makeMetricsOutput(s.metricCh) +} + +func (s traceToMetricSignal) ConsumeInput(ctx context.Context, consumer otelcol.Consumer) error { + return consumer.ConsumeTraces(ctx, s.inputTrace) +} + +// Set the timestamp of all data points to 0. +// This helps avoid flaky tests due to timestamps. +func setMetricTimestampToZero(metrics pmetric.Metrics) { + // Loop over all resource metrics + for i := 0; i < metrics.ResourceMetrics().Len(); i++ { + rm := metrics.ResourceMetrics().At(i) + // Loop over all metric scopes. + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + sm := rm.ScopeMetrics().At(j) + // Loop over all metrics. + for k := 0; k < sm.Metrics().Len(); k++ { + m := sm.Metrics().At(k) + switch m.Type() { + case pmetric.MetricTypeSum: + // Loop over all data points. + for l := 0; l < m.Sum().DataPoints().Len(); l++ { + // Set the timestamp to 0 to avoid flaky tests. + dp := m.Sum().DataPoints().At(l) + dp.SetTimestamp(0) + dp.SetStartTimestamp(0) + } + case pmetric.MetricTypeGauge: + // Loop over all data points. + for l := 0; l < m.Gauge().DataPoints().Len(); l++ { + // Set the timestamp to 0 to avoid flaky tests. + dp := m.Gauge().DataPoints().At(l) + dp.SetTimestamp(0) + dp.SetStartTimestamp(0) + } + case pmetric.MetricTypeHistogram: + // Loop over all data points. + for l := 0; l < m.Histogram().DataPoints().Len(); l++ { + // Set the timestamp to 0 to avoid flaky tests. + dp := m.Histogram().DataPoints().At(l) + dp.SetTimestamp(0) + dp.SetStartTimestamp(0) + } + case pmetric.MetricTypeSummary: + // Loop over all data points. + for l := 0; l < m.Summary().DataPoints().Len(); l++ { + // Set the timestamp to 0 to avoid flaky tests. + dp := m.Summary().DataPoints().At(l) + dp.SetTimestamp(0) + dp.SetStartTimestamp(0) + } + } + } + } + } +} + +// Wait for the component to finish and check its output. +func (s traceToMetricSignal) CheckOutput(t *testing.T) { + // Set the timeout to a few seconds so that all components have finished. + // Components such as otelcol.connector.spanmetrics may need a few + // seconds before they output metrics. + timeout := time.Second * 5 + + select { + case <-time.After(timeout): + require.FailNow(t, "failed waiting for metrics") + case tr := <-s.metricCh: + setMetricTimestampToZero(tr) + trStr := marshalMetrics(tr) + + expStr := marshalMetrics(s.expectedOuutputMetric) + // Set a field from the json to an empty string to avoid flaky tests containing timestamps. + + require.JSONEq(t, expStr, trStr) + } +} + // // Traces // diff --git a/docs/sources/flow/reference/components/otelcol.connector.spanmetrics.md b/docs/sources/flow/reference/components/otelcol.connector.spanmetrics.md index c1e887b78c9f..23c2eaa0a24d 100644 --- a/docs/sources/flow/reference/components/otelcol.connector.spanmetrics.md +++ b/docs/sources/flow/reference/components/otelcol.connector.spanmetrics.md @@ -22,10 +22,25 @@ aggregates Request, Error and Duration (R.E.D) OpenTelemetry metrics from the sp including Errors. Multiple metrics can be aggregated if, for instance, a user wishes to view call counts just on `service.name` and `span.name`. -- **Error** counts are computed from the Request counts which have an `Error` status code metric dimension. + Requests are tracked using a `calls` metric with a `status.code` datapoint attribute set to `Ok`: + ``` + calls { service.name="shipping", span.name="get_shipping/{shippingId}", span.kind="SERVER", status.code="Ok" } + ``` + +- **Error** counts are computed from the number of spans with an `Error` status code. + + Errors are tracked using a `calls` metric with a `status.code` datapoint attribute set to `Error`: + ``` + calls { service.name="shipping", span.name="get_shipping/{shippingId}, span.kind="SERVER", status.code="Error" } + ``` - **Duration** is computed from the difference between the span start and end times and inserted - into the relevant duration histogram time bucket for each unique set dimensions. + into the relevant duration histogram time bucket for each unique set dimensions. + + Span durations are tracked using a `duration` histogram metric: + ``` + duration { service.name="shipping", span.name="get_shipping/{shippingId}", span.kind="SERVER", status.code="Ok" } + ``` > **NOTE**: `otelcol.connector.spanmetrics` is a wrapper over the upstream > OpenTelemetry Collector `spanmetrics` connector. Bug reports or feature requests @@ -52,13 +67,13 @@ otelcol.connector.spanmetrics "LABEL" { `otelcol.connector.spanmetrics` supports the following arguments: -| Name | Type | Description | Default | Required | -| ------------------------- | ---------- | ------------------------------------------------------- | -------------- | -------- | -| `dimensions_cache_size` | `number` | How many dimensions to cache. | `1000` | no | -| `aggregation_temporality` | `string` | Configures whether to reset the metrics after flushing. | `"CUMULATIVE"` | no | -| `metrics_flush_interval` | `duration` | How often to flush generated metrics. | `"15s"` | no | -| `namespace` | `string` | Metric namespace. | `""` | no | -| `exclude_dimensions` | `list(string)` | List of dimensions to be excluded from the default set of dimensions. | `false` | no | +| Name | Type | Description | Default | Required | +| ------------------------- | -------------- | --------------------------------------------------------------------- | -------------- | -------- | +| `dimensions_cache_size` | `number` | How many dimensions to cache. | `1000` | no | +| `aggregation_temporality` | `string` | Configures whether to reset the metrics after flushing. | `"CUMULATIVE"` | no | +| `metrics_flush_interval` | `duration` | How often to flush generated metrics. | `"15s"` | no | +| `namespace` | `string` | Metric namespace. | `""` | no | +| `exclude_dimensions` | `list(string)` | List of dimensions to be excluded from the default set of dimensions. | `false` | no | Adjusting `dimensions_cache_size` can improve the Agent process' memory usage. @@ -130,10 +145,10 @@ The `histogram` block configures the histogram derived from spans' durations. The following attributes are supported: -| Name | Type | Description | Default | Required | -| ------ | -------- | ------------------------------- | ------- | -------- | -| `unit` | `string` | Configures the histogram units. | `"ms"` | no | -| `disable`| `bool` | Disable all histogram metrics. | `false` | no | +| Name | Type | Description | Default | Required | +| --------- | -------- | ------------------------------- | ------- | -------- | +| `unit` | `string` | Configures the histogram units. | `"ms"` | no | +| `disable` | `bool` | Disable all histogram metrics. | `false` | no | The supported values for `unit` are: @@ -166,9 +181,9 @@ The `exemplars` block configures how to attach exemplars to histograms. The following attributes are supported: -| Name | Type | Description | Default | Required | -| ---------- | -------- | ---------------------------------------------------------------- | ------- | -------- | -| `enabled` | `bool` | Configures whether to add exemplars to histograms. | `false` | no | +| Name | Type | Description | Default | Required | +| --------- | ------ | -------------------------------------------------- | ------- | -------- | +| `enabled` | `bool` | Configures whether to add exemplars to histograms. | `false` | no | ### output block @@ -184,6 +199,348 @@ The following fields are exported and can be referenced by other components: `input` accepts `otelcol.Consumer` traces telemetry data. It does not accept metrics and logs. +## Handling of resource attributes + +[Handling of resource attributes]: #handling-of-resource-attributes + +`otelcol.connector.spanmetrics` is an OTLP-native component. As such, it aims to preserve the resource attributes of spans. + +1. For example, let's assume that there are two incoming resources spans with the same `service.name` and `k8s.pod.name` resource attributes. + {{< collapse title="Example JSON of two incoming spans." >}} + + ```json + { + "resourceSpans": [ + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + } + ] + }, + "scopeSpans": [ + { + "spans": [ + { + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381e", + "name": "TestSpan", + "attributes": [ + { + "key": "attribute1", + "value": { "intValue": "78" } + } + ] + } + ] + } + ] + }, + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + } + ] + }, + "scopeSpans": [ + { + "spans": [ + { + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381b", + "name": "TestSpan", + "attributes": [ + { + "key": "attribute1", + "value": { "intValue": "78" } + } + ] + } + ] + } + ] + } + ] + } + ``` + + {{< /collapse >}} + +1. `otelcol.connector.spanmetrics` will preserve the incoming `service.name` and `k8s.pod.name` resource attributes by attaching them to the output metrics resource. + Only one metric resource will be created, because both span resources have identical resource attributes. + {{< collapse title="Example JSON of one outgoing metric resource." >}} + + ```json + { + "resourceMetrics": [ + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + } + ] + }, + "scopeMetrics": [ + { + "scope": { "name": "spanmetricsconnector" }, + "metrics": [ + { + "name": "calls", + "sum": { + "dataPoints": [ + { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + } + ], + "startTimeUnixNano": "1702582936761872000", + "timeUnixNano": "1702582936761872012", + "asInt": "2" + } + ], + "aggregationTemporality": 2, + "isMonotonic": true + } + } + ] + } + ] + } + ] + } + ``` + + {{< /collapse >}} + +1. Now assume that `otelcol.connector.spanmetrics` receives two incoming resource spans, each with a different value for the `k8s.pod.name` recourse attribute. + {{< collapse title="Example JSON of two incoming spans." >}} + + ```json + { + "resourceSpans": [ + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + } + ] + }, + "scopeSpans": [ + { + "spans": [ + { + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381e", + "name": "TestSpan", + "attributes": [ + { + "key": "attribute1", + "value": { "intValue": "78" } + } + ] + } + ] + } + ] + }, + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "second" } + } + ] + }, + "scopeSpans": [ + { + "spans": [ + { + "trace_id": "7bba9f33312b3dbb8b2c2c62bb7abe2d", + "span_id": "086e83747d0e381b", + "name": "TestSpan", + "attributes": [ + { + "key": "attribute1", + "value": { "intValue": "78" } + } + ] + } + ] + } + ] + } + ] + } + ``` + + {{< /collapse >}} + +1. To preserve the values of all resource attributes, `otelcol.connector.spanmetrics` will produce two resource metrics. + Each resource metric will have a different value for the `k8s.pod.name` recourse attribute. + This way none of the resource attributes will be lost during the generation of metrics. + {{< collapse title="Example JSON of two outgoing metric resources." >}} + ```json + { + "resourceMetrics": [ + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "first" } + } + ] + }, + "scopeMetrics": [ + { + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [ + { + "name": "calls", + "sum": { + "dataPoints": [ + { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + } + ], + "startTimeUnixNano": "1702582936761872000", + "timeUnixNano": "1702582936761872012", + "asInt": "1" + } + ], + "aggregationTemporality": 2, + "isMonotonic": true + } + } + ] + } + ] + }, + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "k8s.pod.name", + "value": { "stringValue": "second" } + } + ] + }, + "scopeMetrics": [ + { + "scope": { + "name": "spanmetricsconnector" + }, + "metrics": [ + { + "name": "calls", + "sum": { + "dataPoints": [ + { + "attributes": [ + { + "key": "service.name", + "value": { "stringValue": "TestSvcName" } + }, + { + "key": "span.name", + "value": { "stringValue": "TestSpan" } + }, + { + "key": "span.kind", + "value": { "stringValue": "SPAN_KIND_UNSPECIFIED" } + }, + { + "key": "status.code", + "value": { "stringValue": "STATUS_CODE_UNSET" } + } + ], + "startTimeUnixNano": "1702582936761872000", + "timeUnixNano": "1702582936761872012", + "asInt": "1" + } + ], + "aggregationTemporality": 2, + "isMonotonic": true + } + } + ] + } + ] + } + ] + } + ``` + {{< /collapse >}} + ## Component health `otelcol.connector.spanmetrics` is only reported as unhealthy if given an invalid @@ -259,42 +616,152 @@ otelcol.exporter.otlp "production" { ### Sending metrics via a Prometheus remote write -In order for a `target_info` metric to be generated, the incoming spans resource scope -attributes must contain `service.name` and `service.instance.id` attributes. +The generated metrics can be sent to a Prometheus-compatible database such as Grafana Mimir. +However, extra steps are required in order to make sure all metric samples are received. +This is because `otelcol.connector.spanmetrics` aims to [preserve resource attributes][Handling of resource attributes] in the metrics which it outputs. -The `target_info` metric will be generated for each resource scope, while OpenTelemetry -metric names and attributes will be normalized to be compliant with Prometheus naming rules. +Unfortunately, the [Prometheus data model][prom-data-model] has no notion of resource attributes. +This means that if `otelcol.connector.spanmetrics` outputs metrics with identical metric attributes, +but different resource attributes, `otelcol.exporter.prometheus` will convert the metrics into the same metric series. +This problem can be solved by doing **either** of the following: -```river -otelcol.receiver.otlp "default" { - http {} - grpc {} +- **Recommended approach:** Prior to `otelcol.connector.spanmetrics`, remove all resource attributes from the incoming spans which are not needed by `otelcol.connector.spanmetrics`. + {{< collapse title="Example River configuration to remove unnecessary resource attributes." >}} + ```river + otelcol.receiver.otlp "default" { + http {} + grpc {} - output { - traces = [otelcol.connector.spanmetrics.default.input] + output { + traces = [otelcol.processor.transform.default.input] + } } -} -otelcol.connector.spanmetrics "default" { - histogram { - exponential {} + // Remove all resource attributes except the ones which + // the otelcol.connector.spanmetrics needs. + // If this is not done, otelcol.exporter.prometheus may fail to + // write some samples due to an "err-mimir-sample-duplicate-timestamp" error. + // This is because the spanmetricsconnector will create a new + // metrics resource scope for each traces resource scope. + otelcol.processor.transform "default" { + error_mode = "ignore" + + trace_statements { + context = "resource" + statements = [ + // We keep only the "service.name" and "special.attr" resource attributes, + // because they are the only ones which otelcol.connector.spanmetrics needs. + // + // There is no need to list "span.name", "span.kind", and "status.code" + // here because they are properties of the span (and not resource attributes): + // https://github.com/open-telemetry/opentelemetry-proto/blob/v1.0.0/opentelemetry/proto/trace/v1/trace.proto + `keep_keys(attributes, ["service.name", "special.attr"])`, + ] + } + + output { + traces = [otelcol.connector.spanmetrics.default.input] + } } - output { - metrics = [otelcol.exporter.prometheus.default.input] + otelcol.connector.spanmetrics "default" { + histogram { + explicit {} + } + + dimension { + name = "special.attr" + } + output { + metrics = [otelcol.exporter.prometheus.default.input] + } } -} -otelcol.exporter.prometheus "default" { - forward_to = [prometheus.remote_write.mimir.receiver] -} + otelcol.exporter.prometheus "default" { + forward_to = [prometheus.remote_write.mimir.receiver] + } -prometheus.remote_write "mimir" { - endpoint { - url = "http://mimir:9009/api/v1/push" + prometheus.remote_write "mimir" { + endpoint { + url = "http://mimir:9009/api/v1/push" + } } -} -``` + ``` + {{< /collapse >}} + +- Or, after `otelcol.connector.spanmetrics`, copy each of the resource attributes as a metric datapoint attribute. +This has the advantage that the resource attributes will be visible as metric labels. +However, the {{< term "cardinality" >}}cardinality{{< /term >}} of the metrics may be much higher, which could increase the cost of storing and querying them. +The example below uses the [merge_maps][] OTTL function. + + {{< collapse title="Example River configuration to add all resource attributes as metric datapoint attributes." >}} + ```river + otelcol.receiver.otlp "default" { + http {} + grpc {} + + output { + traces = [otelcol.connector.spanmetrics.default.input] + } + } + + otelcol.connector.spanmetrics "default" { + histogram { + explicit {} + } + + dimension { + name = "special.attr" + } + output { + metrics = [otelcol.processor.transform.default.input] + } + } + + // Insert resource attributes as metric data point attributes. + otelcol.processor.transform "default" { + error_mode = "ignore" + + metric_statements { + context = "datapoint" + statements = [ + // "insert" means that a metric datapoint attribute will be inserted + // only if an attribute with the same key does not already exist. + `merge_maps(attributes, resource.attributes, "insert")`, + ] + } + + output { + metrics = [otelcol.exporter.prometheus.default.input] + } + } + + otelcol.exporter.prometheus "default" { + forward_to = [prometheus.remote_write.mimir.receiver] + } + + prometheus.remote_write "mimir" { + endpoint { + url = "http://mimir:9009/api/v1/push" + } + } + ``` + {{< /collapse >}} + +If the resource attributes are not treated in either of the ways described above, an error such as this one could be logged by `prometheus.remote_write`: +`the sample has been rejected because another sample with the same timestamp, but a different value, has already been ingested (err-mimir-sample-duplicate-timestamp)`. + +{{% admonition type="note" %}} +In order for a Prometheus `target_info` metric to be generated, the incoming spans resource scope +attributes must contain `service.name` and `service.instance.id` attributes. + +The `target_info` metric will be generated for each resource scope, while OpenTelemetry +metric names and attributes will be normalized to be compliant with Prometheus naming rules. +{{% /admonition %}} + +[merge_maps]: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/{{< param "OTEL_VERSION" >}}/pkg/ottl/ottlfuncs/README.md#merge_maps +[prom-data-model]: https://prometheus.io/docs/concepts/data_model/ + ## Compatible components