From 2498ea3b00747bde6361042b56de805807f0f3ca Mon Sep 17 00:00:00 2001 From: Bruno Teixeira Date: Fri, 1 Nov 2024 16:32:40 +0000 Subject: [PATCH] feat/re-allow multiple workers Signed-off-by: Bruno Teixeira --- ...sremotewrite-reallow-multiple-workers.yaml | 27 +++++++++++++++++++ .../prometheusremotewriteexporter/README.md | 19 ++++++++++++- .../prometheusremotewriteexporter/factory.go | 10 ++----- .../generated_package_test.go | 3 +-- 4 files changed, 48 insertions(+), 11 deletions(-) create mode 100644 .chloggen/prometheusremotewrite-reallow-multiple-workers.yaml diff --git a/.chloggen/prometheusremotewrite-reallow-multiple-workers.yaml b/.chloggen/prometheusremotewrite-reallow-multiple-workers.yaml new file mode 100644 index 000000000000..a8faf9000abe --- /dev/null +++ b/.chloggen/prometheusremotewrite-reallow-multiple-workers.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: prometheusremotewriteexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Re allows the configuration of multiple workers + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [36134] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/exporter/prometheusremotewriteexporter/README.md b/exporter/prometheusremotewriteexporter/README.md index 64413927fb52..43b30ba4bb20 100644 --- a/exporter/prometheusremotewriteexporter/README.md +++ b/exporter/prometheusremotewriteexporter/README.md @@ -54,7 +54,7 @@ The following settings can be optionally configured: - `remote_write_queue`: fine tuning for queueing and sending of the outgoing remote writes. - `enabled`: enable the sending queue (default: `true`) - `queue_size`: number of OTLP metrics that can be queued. Ignored if `enabled` is `false` (default: `10000`) - - `num_consumers`: minimum number of workers to use to fan out the outgoing requests. (default: `5`) + - `num_consumers`: minimum number of workers to use to fan out the outgoing requests. (default: `1`). Setting this to a number higher than 1 requires the target destination to supports ingestion of OutOfOrder samples. See [Multiple Consumers and OutOfOrder](#multiple-consumers-and-outoforder) for more info - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `target_info`: customize `target_info` metric @@ -149,3 +149,20 @@ sum by (namespace) (app_ads_ad_requests_total) [beta]:https://github.com/open-telemetry/opentelemetry-collector#beta [contrib]:https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib [core]:https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol + +## Multiple Consumers and OutOfOrder + +**DISCLAIMER**: This snippet applies only to Prometheus, other remote write destinations using Prometheus Protocol (ex: Thanos/Grafana Mimir/VictoriaMetrics) may have different settings. + +By default, Prometheus tipically expects samples to be ingested sequentially, in temporal order. + +When multiple consumers are enabled, the temporal ordering the samples written to the target destination is not deterministic and temporal ordering can no longer be garanteed. Example: one worker may be pushing a sample for `t+30s` and a second worker may push an additional sample but for `t+15s`. + +Vanilla Prometheus configurations will reject these unordered samples. You will find similar error to this `Error on ingesting out-of-order samples`. + +For enabling multiple consumers, it is necessary to enable out-of-order support in Prometheus. +This can be done by using the `tsdb.out_of_order_time_window: 10m` settings. Please choose an appropriate time window to support pushing the worst case scenarios of a "queue" build up on the sender side. + +See for more info: +- https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb +- https://prometheus.io/docs/prometheus/latest/feature_flags/#remote-write-receiver diff --git a/exporter/prometheusremotewriteexporter/factory.go b/exporter/prometheusremotewriteexporter/factory.go index 390b47839765..39b71736718e 100644 --- a/exporter/prometheusremotewriteexporter/factory.go +++ b/exporter/prometheusremotewriteexporter/factory.go @@ -47,12 +47,6 @@ func createMetricsExporter(ctx context.Context, set exporter.Settings, return nil, err } - // Don't allow users to configure the queue. - // See https://github.com/open-telemetry/opentelemetry-collector/issues/2949. - // Prometheus remote write samples needs to be in chronological - // order for each timeseries. If we shard the incoming metrics - // without considering this limitation, we experience - // "out of order samples" errors. exporter, err := exporterhelper.NewMetrics( ctx, set, @@ -61,7 +55,7 @@ func createMetricsExporter(ctx context.Context, set exporter.Settings, exporterhelper.WithTimeout(prwCfg.TimeoutSettings), exporterhelper.WithQueue(exporterhelper.QueueConfig{ Enabled: prwCfg.RemoteWriteQueue.Enabled, - NumConsumers: 1, + NumConsumers: prwCfg.RemoteWriteQueue.NumConsumers, QueueSize: prwCfg.RemoteWriteQueue.QueueSize, }), exporterhelper.WithStart(prwe.Start), @@ -96,7 +90,7 @@ func createDefaultConfig() component.Config { RemoteWriteQueue: RemoteWriteQueue{ Enabled: true, QueueSize: 10000, - NumConsumers: 5, + NumConsumers: 1, // Default to 1 to avoid Out of Order in Vanilla Prometheus Setups }, TargetInfo: &TargetInfo{ Enabled: true, diff --git a/exporter/prometheusremotewriteexporter/generated_package_test.go b/exporter/prometheusremotewriteexporter/generated_package_test.go index aa7545f2a746..90a9ba58d657 100644 --- a/exporter/prometheusremotewriteexporter/generated_package_test.go +++ b/exporter/prometheusremotewriteexporter/generated_package_test.go @@ -3,9 +3,8 @@ package prometheusremotewriteexporter import ( - "testing" - "go.uber.org/goleak" + "testing" ) func TestMain(m *testing.M) {