From 132ccdea79f9f9991b59b378bcc2cd8f328f58b1 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Wed, 18 Dec 2024 14:39:06 -0800 Subject: [PATCH] Create new `cortex_querier_codec_response_size` histogram to track the size of the encoded Query responses (#6444) * Create new histogram to track the size of the encoded responses from QF to Querier Signed-off-by: alanprot * Changelog Signed-off-by: alanprot --------- Signed-off-by: alanprot --- CHANGELOG.md | 1 + pkg/api/handlers.go | 10 +++-- pkg/querier/codec/instrumented_codec.go | 57 ++++++++++++++++++++++++ pkg/querier/codec/protobuf_codec_test.go | 23 +++++++++- 4 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 pkg/querier/codec/instrumented_codec.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c9f26389e5..bf025c719d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ * [ENHANCEMENT] Ingester: Make sure unregistered ingester joining the ring after WAL replay. #6277 * [ENHANCEMENT] Distributor: Add a new `-distributor.num-push-workers` flag to use a goroutine worker pool when sending data from distributor to ingesters. #6406 * [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435 +* [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 * [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326 * [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled. #6271 diff --git a/pkg/api/handlers.go b/pkg/api/handlers.go index af9d1d539c..d931d4a40d 100644 --- a/pkg/api/handlers.go +++ b/pkg/api/handlers.go @@ -231,11 +231,15 @@ func NewQuerierHandler( nil, false, ) + // Let's clear all codecs to create the instrumented ones + api.ClearCodecs() + cm := codec.NewInstrumentedCodecMetrics(reg) - // JSON codec is already installed. Install Protobuf codec to give the option for using either. - api.InstallCodec(codec.ProtobufCodec{CortexInternal: false}) + api.InstallCodec(codec.NewInstrumentedCodec(v1.JSONCodec{}, cm)) + // Install Protobuf codec to give the option for using either. + api.InstallCodec(codec.NewInstrumentedCodec(codec.ProtobufCodec{CortexInternal: false}, cm)) // Protobuf codec for Cortex internal requests. This should be used by Cortex Ruler only for remote evaluation. - api.InstallCodec(codec.ProtobufCodec{CortexInternal: true}) + api.InstallCodec(codec.NewInstrumentedCodec(codec.ProtobufCodec{CortexInternal: true}, cm)) router := mux.NewRouter() diff --git a/pkg/querier/codec/instrumented_codec.go b/pkg/querier/codec/instrumented_codec.go new file mode 100644 index 0000000000..d9c502130f --- /dev/null +++ b/pkg/querier/codec/instrumented_codec.go @@ -0,0 +1,57 @@ +package codec + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + v1 "github.com/prometheus/prometheus/web/api/v1" + "github.com/weaveworks/common/middleware" +) + +type InstrumentedCodecMetrics struct { + responseSizeHistogram *prometheus.HistogramVec +} + +func NewInstrumentedCodecMetrics(reg prometheus.Registerer) *InstrumentedCodecMetrics { + return &InstrumentedCodecMetrics{ + responseSizeHistogram: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "cortex", + Name: "querier_codec_response_size", + Help: "Size of the encoded prometheus response from the queriers.", + Buckets: middleware.BodySizeBuckets, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: time.Hour, + }, []string{"content_type"}), + } +} + +type InstrumentedCodec struct { + uc v1.Codec + + metrics *InstrumentedCodecMetrics +} + +func (c *InstrumentedCodec) ContentType() v1.MIMEType { + return c.uc.ContentType() +} + +func (c *InstrumentedCodec) CanEncode(resp *v1.Response) bool { + return c.uc.CanEncode(resp) +} + +func (c *InstrumentedCodec) Encode(resp *v1.Response) ([]byte, error) { + b, err := c.uc.Encode(resp) + if err == nil { + c.metrics.responseSizeHistogram.WithLabelValues(c.uc.ContentType().String()).Observe(float64(len((b)))) + } + return b, err +} + +func NewInstrumentedCodec(uc v1.Codec, m *InstrumentedCodecMetrics) v1.Codec { + return &InstrumentedCodec{ + uc: uc, + metrics: m, + } +} diff --git a/pkg/querier/codec/protobuf_codec_test.go b/pkg/querier/codec/protobuf_codec_test.go index 310e49b392..3b32fc1208 100644 --- a/pkg/querier/codec/protobuf_codec_test.go +++ b/pkg/querier/codec/protobuf_codec_test.go @@ -1,9 +1,13 @@ package codec import ( + "bytes" + "fmt" "testing" "github.com/gogo/protobuf/proto" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -437,7 +441,9 @@ func TestProtobufCodec_Encode(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - codec := ProtobufCodec{CortexInternal: test.cortexInternal} + reg := prometheus.NewPedanticRegistry() + cm := NewInstrumentedCodecMetrics(reg) + codec := NewInstrumentedCodec(ProtobufCodec{CortexInternal: test.cortexInternal}, cm) body, err := codec.Encode(&v1.Response{ Status: tripperware.StatusSuccess, Data: test.data, @@ -446,6 +452,21 @@ func TestProtobufCodec_Encode(t *testing.T) { b, err := proto.Marshal(test.expected) require.NoError(t, err) require.Equal(t, string(b), string(body)) + require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(fmt.Sprintf(` + # HELP cortex_querier_codec_response_size Size of the encoded prometheus response from the queriers. + # TYPE cortex_querier_codec_response_size histogram + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="1.048576e+06"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="2.62144e+06"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="5.24288e+06"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="1.048576e+07"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="2.62144e+07"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="5.24288e+07"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="1.048576e+08"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="2.62144e+08"} 1 + cortex_querier_codec_response_size_bucket{content_type="`+codec.ContentType().String()+`",le="+Inf"} 1 + cortex_querier_codec_response_size_sum{content_type="`+codec.ContentType().String()+`"} %v + cortex_querier_codec_response_size_count{content_type="`+codec.ContentType().String()+`"} 1 + `, len(body))), "cortex_querier_codec_response_size")) }) } }