From 67ac0baac852202dca1e226f55ac0bbbb57d3cbb Mon Sep 17 00:00:00 2001 From: Essam Eldaly Date: Fri, 20 Dec 2024 13:45:27 -0800 Subject: [PATCH] Add metric name in limiter per-metric exceeded errors Signed-off-by: Essam Eldaly --- CHANGELOG.md | 1 + pkg/ingester/ingester.go | 6 +++--- pkg/ingester/ingester_test.go | 4 ++-- pkg/ingester/limiter.go | 30 +++++++++++++-------------- pkg/ingester/limiter_test.go | 15 +++++++------- pkg/ingester/user_metrics_metadata.go | 4 ++-- 6 files changed, 31 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7da08bf6ea..97db864213 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ * [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435 * [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444 * [ENHANCEMENT] Distributor: Added `cortex_distributor_received_samples_per_labelset_total` metric to calculate ingestion rate per label set. #6443 +* [ENHANCEMENT] Added metric name in limiter per-metric exceeded errors. #6416 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 * [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326 * [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled. #6271 diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 1d3fc2c5fc..860eaea8f9 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -1169,18 +1169,18 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte case errors.Is(cause, errMaxSeriesPerUserLimitExceeded): perUserSeriesLimitCount++ - updateFirstPartial(func() error { return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause)) }) + updateFirstPartial(func() error { return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause, copiedLabels)) }) case errors.Is(cause, errMaxSeriesPerMetricLimitExceeded): perMetricSeriesLimitCount++ updateFirstPartial(func() error { - return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause)) + return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels)) }) case errors.As(cause, &errMaxSeriesPerLabelSetLimitExceeded{}): perLabelSetSeriesLimitCount++ updateFirstPartial(func() error { - return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause)) + return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels)) }) case errors.Is(cause, histogram.ErrHistogramSpanNegativeOffset): diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index d4d05fef9a..ab21c30da7 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -656,7 +656,7 @@ func TestIngesterUserLimitExceeded(t *testing.T) { httpResp, ok := httpgrpc.HTTPResponseFromError(err) require.True(t, ok, "returned error is not an httpgrpc response") assert.Equal(t, http.StatusBadRequest, int(httpResp.Code)) - assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded)), userID).Error(), string(httpResp.Body)) + assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded, labels1)), userID).Error(), string(httpResp.Body)) // Append two metadata, expect no error since metadata is a best effort approach. _, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API)) @@ -778,7 +778,7 @@ func TestIngesterMetricLimitExceeded(t *testing.T) { httpResp, ok := httpgrpc.HTTPResponseFromError(err) require.True(t, ok, "returned error is not an httpgrpc response") assert.Equal(t, http.StatusBadRequest, int(httpResp.Code)) - assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded)), userID).Error(), string(httpResp.Body)) + assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded, labels1)), userID).Error(), string(httpResp.Body)) // Append two metadata for the same metric. Drop the second one, and expect no error since metadata is a best effort approach. _, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API)) diff --git a/pkg/ingester/limiter.go b/pkg/ingester/limiter.go index 0aaf6b312a..1cf20e5f2c 100644 --- a/pkg/ingester/limiter.go +++ b/pkg/ingester/limiter.go @@ -130,26 +130,26 @@ func (l *Limiter) AssertMaxSeriesPerLabelSet(userID string, metric labels.Labels // FormatError returns the input error enriched with the actual limits for the given user. // It acts as pass-through if the input error is unknown. -func (l *Limiter) FormatError(userID string, err error) error { +func (l *Limiter) FormatError(userID string, err error, lbls labels.Labels) error { switch { case errors.Is(err, errMaxSeriesPerUserLimitExceeded): - return l.formatMaxSeriesPerUserError(userID) + return l.formatMaxSeriesPerUserError(userID, lbls) case errors.Is(err, errMaxSeriesPerMetricLimitExceeded): - return l.formatMaxSeriesPerMetricError(userID) + return l.formatMaxSeriesPerMetricError(userID, lbls) case errors.Is(err, errMaxMetadataPerUserLimitExceeded): - return l.formatMaxMetadataPerUserError(userID) + return l.formatMaxMetadataPerUserError(userID, lbls) case errors.Is(err, errMaxMetadataPerMetricLimitExceeded): - return l.formatMaxMetadataPerMetricError(userID) + return l.formatMaxMetadataPerMetricError(userID, lbls) case errors.As(err, &errMaxSeriesPerLabelSetLimitExceeded{}): e := errMaxSeriesPerLabelSetLimitExceeded{} errors.As(err, &e) - return l.formatMaxSeriesPerLabelSetError(e) + return l.formatMaxSeriesPerLabelSetError(e, lbls) default: return err } } -func (l *Limiter) formatMaxSeriesPerUserError(userID string) error { +func (l *Limiter) formatMaxSeriesPerUserError(userID string, lbls labels.Labels) error { actualLimit := l.maxSeriesPerUser(userID) localLimit := l.limits.MaxLocalSeriesPerUser(userID) globalLimit := l.limits.MaxGlobalSeriesPerUser(userID) @@ -158,16 +158,16 @@ func (l *Limiter) formatMaxSeriesPerUserError(userID string) error { minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxSeriesPerMetricError(userID string) error { +func (l *Limiter) formatMaxSeriesPerMetricError(userID string, lbls labels.Labels) error { actualLimit := l.maxSeriesPerMetric(userID) localLimit := l.limits.MaxLocalSeriesPerMetric(userID) globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID) - return fmt.Errorf("per-metric series limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)", - minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) + return fmt.Errorf("per-metric series limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)", + minNonZero(localLimit, globalLimit), lbls.Get(labels.MetricName), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxMetadataPerUserError(userID string) error { +func (l *Limiter) formatMaxMetadataPerUserError(userID string, lbls labels.Labels) error { actualLimit := l.maxMetadataPerUser(userID) localLimit := l.limits.MaxLocalMetricsWithMetadataPerUser(userID) globalLimit := l.limits.MaxGlobalMetricsWithMetadataPerUser(userID) @@ -176,16 +176,16 @@ func (l *Limiter) formatMaxMetadataPerUserError(userID string) error { minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxMetadataPerMetricError(userID string) error { +func (l *Limiter) formatMaxMetadataPerMetricError(userID string, lbls labels.Labels) error { actualLimit := l.maxMetadataPerMetric(userID) localLimit := l.limits.MaxLocalMetadataPerMetric(userID) globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID) - return fmt.Errorf("per-metric metadata limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)", - minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) + return fmt.Errorf("per-metric metadata limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)", + minNonZero(localLimit, globalLimit), lbls.Get(labels.MetricName), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxSeriesPerLabelSetError(err errMaxSeriesPerLabelSetLimitExceeded) error { +func (l *Limiter) formatMaxSeriesPerLabelSetError(err errMaxSeriesPerLabelSetLimitExceeded, lbls labels.Labels) error { return fmt.Errorf("per-labelset series limit of %d exceeded (labelSet: %s, local limit: %d global limit: %d actual)", minNonZero(err.globalLimit, err.localLimit), err.id, err.localLimit, err.globalLimit) } diff --git a/pkg/ingester/limiter_test.go b/pkg/ingester/limiter_test.go index 944af00293..a1043b053e 100644 --- a/pkg/ingester/limiter_test.go +++ b/pkg/ingester/limiter_test.go @@ -588,21 +588,22 @@ func TestLimiter_FormatError(t *testing.T) { require.NoError(t, err) limiter := NewLimiter(limits, ring, util.ShardingStrategyDefault, true, 3, false, "please contact administrator to raise it") + lbls := labels.FromStrings(labels.MetricName, "testMetric") - actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded) + actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded, lbls) assert.EqualError(t, actual, "per-user series limit of 100 exceeded, please contact administrator to raise it (local limit: 0 global limit: 100 actual local limit: 100)") - actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded) - assert.EqualError(t, actual, "per-metric series limit of 20 exceeded, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)") + actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded, lbls) + assert.EqualError(t, actual, "per-metric series limit of 20 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)") - actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded) + actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded, lbls) assert.EqualError(t, actual, "per-user metric metadata limit of 10 exceeded, please contact administrator to raise it (local limit: 0 global limit: 10 actual local limit: 10)") - actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded) - assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)") + actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded, lbls) + assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)") input := errors.New("unknown error") - actual = limiter.FormatError("user-1", input) + actual = limiter.FormatError("user-1", input, lbls) assert.Equal(t, input, actual) } diff --git a/pkg/ingester/user_metrics_metadata.go b/pkg/ingester/user_metrics_metadata.go index 96adfaa9ba..dcb5fd6bbf 100644 --- a/pkg/ingester/user_metrics_metadata.go +++ b/pkg/ingester/user_metrics_metadata.go @@ -45,7 +45,7 @@ func (mm *userMetricsMetadata) add(metric string, metadata *cortexpb.MetricMetad // Verify that the user can create more metric metadata given we don't have a set for that metric name. if err := mm.limiter.AssertMaxMetricsWithMetadataPerUser(mm.userID, len(mm.metricToMetadata)); err != nil { mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perUserMetadataLimit).Inc() - return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err)) + return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric))) } set = metricMetadataSet{} mm.metricToMetadata[metric] = set @@ -53,7 +53,7 @@ func (mm *userMetricsMetadata) add(metric string, metadata *cortexpb.MetricMetad if err := mm.limiter.AssertMaxMetadataPerMetric(mm.userID, len(set)); err != nil { mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perMetricMetadataLimit).Inc() - return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err)) + return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric))) } // if we have seen this metadata before, it is a no-op and we don't need to change our metrics.