Skip to content

Commit

Permalink
Update thanos to latest main to pull lazy posting improvements (#6411)
Browse files Browse the repository at this point in the history
* update thanos to latest main to pull lazy posting improvements

Signed-off-by: Ben Ye <[email protected]>

* fix lint

Signed-off-by: Ben Ye <[email protected]>

---------

Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 authored Dec 10, 2024
1 parent 1177a67 commit 68012ec
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 40 deletions.
8 changes: 8 additions & 0 deletions docs/blocks-storage/querier.md
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,14 @@ blocks_storage:
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
[lazy_expanded_postings_enabled: <boolean> | default = false]

# Mark posting group as lazy if it fetches more keys than R * max series the
# query should fetch. With R set to 100, a posting group which fetches 100K
# keys will be marked as lazy if the current query only fetches 1000 series.
# This config is only valid if lazy expanded posting is enabled. 0 disables
# the limit.
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]

# Controls how many series to fetch per batch in Store Gateway. Default
# value is 10000.
# CLI flag: -blocks-storage.bucket-store.series-batch-size
Expand Down
8 changes: 8 additions & 0 deletions docs/blocks-storage/store-gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,14 @@ blocks_storage:
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
[lazy_expanded_postings_enabled: <boolean> | default = false]

# Mark posting group as lazy if it fetches more keys than R * max series the
# query should fetch. With R set to 100, a posting group which fetches 100K
# keys will be marked as lazy if the current query only fetches 1000 series.
# This config is only valid if lazy expanded posting is enabled. 0 disables
# the limit.
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]

# Controls how many series to fetch per batch in Store Gateway. Default
# value is 10000.
# CLI flag: -blocks-storage.bucket-store.series-batch-size
Expand Down
8 changes: 8 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,14 @@ bucket_store:
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
[lazy_expanded_postings_enabled: <boolean> | default = false]

# Mark posting group as lazy if it fetches more keys than R * max series the
# query should fetch. With R set to 100, a posting group which fetches 100K
# keys will be marked as lazy if the current query only fetches 1000 series.
# This config is only valid if lazy expanded posting is enabled. 0 disables
# the limit.
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]

# Controls how many series to fetch per batch in Store Gateway. Default value
# is 10000.
# CLI flag: -blocks-storage.bucket-store.series-batch-size
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ require (
github.com/stretchr/testify v1.10.0
github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278
github.com/uber/jaeger-client-go v2.30.0+incompatible
github.com/weaveworks/common v0.0.0-20230728070032-dd9e68f319d5
go.etcd.io/etcd/api/v3 v3.5.17
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1665,8 +1665,8 @@ github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97 h1:VjG0mwhN1Dkn
github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97/go.mod h1:vyzFrBXgP+fGNG2FopEGWOO/zrIuoy7zt3LpLeezRsw=
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68 h1:cChM/FbpXeYmrSmXO1/MmmSlONviLVxWAWCB0/g4JrY=
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68/go.mod h1:wx0JlRZtsB2S10JYUgeg5GqLfMxw31SzArP+28yyE00=
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc h1:LMpGIErJWqv+9FmCHAcl9t+6VL8gn6lptIKDgglbNnU=
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc/go.mod h1:5Ni7Uc1Bc8UCGOYmZ/2f/LVvDkZKNDdqDJZqjDFG+rI=
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278 h1:5MYGbe7gYtPE/DYReOxrevi++3+mgwz5ud9ji/lwXrg=
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278/go.mod h1:5Ni7Uc1Bc8UCGOYmZ/2f/LVvDkZKNDdqDJZqjDFG+rI=
github.com/tjhop/slog-gokit v0.1.2 h1:pmQI4SvU9h4gA0vIQsdhJQSqQg4mOmsPykG2/PM3j1I=
github.com/tjhop/slog-gokit v0.1.2/go.mod h1:8fhlcp8C8ELbg3GCyKv06tgt4B5sDq2P1r2DQAu1HuM=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
Expand Down
14 changes: 11 additions & 3 deletions pkg/storage/tsdb/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ var (
errEmptyBlockranges = errors.New("empty block ranges for TSDB")
errUnSupportedWALCompressionType = errors.New("unsupported WAL compression type, valid types are (zstd, snappy and '')")

ErrInvalidBucketIndexBlockDiscoveryStrategy = errors.New("bucket index block discovery strategy can only be enabled when bucket index is enabled")
ErrBlockDiscoveryStrategy = errors.New("invalid block discovery strategy")
ErrInvalidTokenBucketBytesLimiterMode = errors.New("invalid token bucket bytes limiter mode")
ErrInvalidBucketIndexBlockDiscoveryStrategy = errors.New("bucket index block discovery strategy can only be enabled when bucket index is enabled")
ErrBlockDiscoveryStrategy = errors.New("invalid block discovery strategy")
ErrInvalidTokenBucketBytesLimiterMode = errors.New("invalid token bucket bytes limiter mode")
ErrInvalidLazyExpandedPostingGroupMaxKeySeriesRatio = errors.New("lazy expanded posting group max key series ratio needs to be equal or greater than 0")
)

// BlocksStorageConfig holds the config information for the blocks storage.
Expand Down Expand Up @@ -291,6 +292,9 @@ type BucketStoreConfig struct {
// Controls whether lazy expanded posting optimization is enabled or not.
LazyExpandedPostingsEnabled bool `yaml:"lazy_expanded_postings_enabled"`

// Controls whether expanded posting group is marked as lazy or not depending on number of keys to fetch.
LazyExpandedPostingGroupMaxKeySeriesRatio float64 `yaml:"lazy_expanded_posting_group_max_key_series_ratio"`

// Controls the partitioner, used to aggregate multiple GET object API requests.
// The config option is hidden until experimental.
PartitionerMaxGapBytes uint64 `yaml:"partitioner_max_gap_bytes" doc:"hidden"`
Expand Down Expand Up @@ -356,6 +360,7 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) {
f.Uint64Var(&cfg.EstimatedMaxSeriesSizeBytes, "blocks-storage.bucket-store.estimated-max-series-size-bytes", store.EstimatedMaxSeriesSize, "Estimated max series size in bytes. Setting a large value might result in over fetching data while a small value might result in data refetch. Default value is 64KB.")
f.Uint64Var(&cfg.EstimatedMaxChunkSizeBytes, "blocks-storage.bucket-store.estimated-max-chunk-size-bytes", store.EstimatedMaxChunkSize, "Estimated max chunk size in bytes. Setting a large value might result in over fetching data while a small value might result in data refetch. Default value is 16KiB.")
f.BoolVar(&cfg.LazyExpandedPostingsEnabled, "blocks-storage.bucket-store.lazy-expanded-postings-enabled", false, "If true, Store Gateway will estimate postings size and try to lazily expand postings if it downloads less data than expanding all postings.")
f.Float64Var(&cfg.LazyExpandedPostingGroupMaxKeySeriesRatio, "blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio", 100, "Mark posting group as lazy if it fetches more keys than R * max series the query should fetch. With R set to 100, a posting group which fetches 100K keys will be marked as lazy if the current query only fetches 1000 series. This config is only valid if lazy expanded posting is enabled. 0 disables the limit.")
f.IntVar(&cfg.SeriesBatchSize, "blocks-storage.bucket-store.series-batch-size", store.SeriesBatchSize, "Controls how many series to fetch per batch in Store Gateway. Default value is 10000.")
f.StringVar(&cfg.BlockDiscoveryStrategy, "blocks-storage.bucket-store.block-discovery-strategy", string(ConcurrentDiscovery), "One of "+strings.Join(supportedBlockDiscoveryStrategies, ", ")+". When set to concurrent, stores will concurrently issue one call per directory to discover active blocks in the bucket. The recursive strategy iterates through all objects in the bucket, recursively traversing into each directory. This avoids N+1 calls at the expense of having slower bucket iterations. bucket_index strategy can be used in Compactor only and utilizes the existing bucket index to fetch block IDs to sync. This avoids iterating the bucket but can be impacted by delays of cleaner creating bucket index.")
f.StringVar(&cfg.TokenBucketBytesLimiter.Mode, "blocks-storage.bucket-store.token-bucket-bytes-limiter.mode", string(TokenBucketBytesLimiterDisabled), fmt.Sprintf("Token bucket bytes limiter mode. Supported values are: %s", strings.Join(supportedTokenBucketBytesLimiterModes, ", ")))
Expand Down Expand Up @@ -390,6 +395,9 @@ func (cfg *BucketStoreConfig) Validate() error {
if !util.StringsContain(supportedTokenBucketBytesLimiterModes, cfg.TokenBucketBytesLimiter.Mode) {
return ErrInvalidTokenBucketBytesLimiterMode
}
if cfg.LazyExpandedPostingGroupMaxKeySeriesRatio < 0 {
return ErrInvalidLazyExpandedPostingGroupMaxKeySeriesRatio
}
return nil
}

Expand Down
7 changes: 7 additions & 0 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type BucketStoreMetrics struct {
chunkFetchDurationSum *prometheus.Desc

lazyExpandedPostingsCount *prometheus.Desc
lazyExpandedPostingGroups *prometheus.Desc
lazyExpandedPostingSizeBytes *prometheus.Desc
lazyExpandedPostingSeriesOverfetchedSizeBytes *prometheus.Desc

Expand Down Expand Up @@ -209,6 +210,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
"cortex_bucket_store_lazy_expanded_postings_total",
"Total number of lazy expanded postings when fetching block series.",
nil, nil),
lazyExpandedPostingGroups: prometheus.NewDesc(
"cortex_bucket_store_lazy_expanded_posting_groups_total",
"Total number of posting groups that are marked as lazy and corresponding reason.",
[]string{"reason"}, nil),
lazyExpandedPostingSizeBytes: prometheus.NewDesc(
"cortex_bucket_store_lazy_expanded_posting_size_bytes_total",
"Total number of lazy posting group size in bytes.",
Expand Down Expand Up @@ -269,6 +274,7 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.indexHeaderLazyLoadDuration

out <- m.lazyExpandedPostingsCount
out <- m.lazyExpandedPostingGroups
out <- m.lazyExpandedPostingSizeBytes
out <- m.lazyExpandedPostingSeriesOverfetchedSizeBytes
}
Expand Down Expand Up @@ -319,6 +325,7 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds")

data.SendSumOfCounters(out, m.lazyExpandedPostingsCount, "thanos_bucket_store_lazy_expanded_postings_total")
data.SendSumOfCountersWithLabels(out, m.lazyExpandedPostingGroups, "thanos_bucket_store_lazy_expanded_posting_groups_total", "reason")
data.SendSumOfCounters(out, m.lazyExpandedPostingSizeBytes, "thanos_bucket_store_lazy_expanded_posting_size_bytes_total")
data.SendSumOfCounters(out, m.lazyExpandedPostingSeriesOverfetchedSizeBytes, "thanos_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total")
}
12 changes: 12 additions & 0 deletions pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,10 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations.
# TYPE cortex_bucket_store_indexheader_lazy_unload_total counter
cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06
# HELP cortex_bucket_store_lazy_expanded_posting_groups_total Total number of posting groups that are marked as lazy and corresponding reason.
# TYPE cortex_bucket_store_lazy_expanded_posting_groups_total counter
cortex_bucket_store_lazy_expanded_posting_groups_total{reason="keys_limit"} 202671
cortex_bucket_store_lazy_expanded_posting_groups_total{reason="postings_size"} 225190
# HELP cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total Total number of series size in bytes overfetched due to posting lazy expansion.
# TYPE cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total counter
cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total 180152
Expand Down Expand Up @@ -687,6 +691,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.lazyExpandedPostingsCount.Add(6 * base)
m.lazyExpandedPostingSizeBytes.Add(7 * base)
m.lazyExpandedPostingSeriesOverfetchedSizeBytes.Add(8 * base)
m.lazyExpandedPostingGroups.WithLabelValues("keys_limit").Add(9 * base)
m.lazyExpandedPostingGroups.WithLabelValues("postings_size").Add(10 * base)

return reg
}
Expand Down Expand Up @@ -733,6 +739,7 @@ type mockedBucketStoreMetrics struct {
indexHeaderLazyLoadDuration prometheus.Histogram

lazyExpandedPostingsCount prometheus.Counter
lazyExpandedPostingGroups *prometheus.CounterVec
lazyExpandedPostingSizeBytes prometheus.Counter
lazyExpandedPostingSeriesOverfetchedSizeBytes prometheus.Counter
}
Expand Down Expand Up @@ -917,6 +924,11 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Help: "Total number of times when lazy expanded posting optimization applies.",
})

m.lazyExpandedPostingGroups = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_bucket_store_lazy_expanded_posting_groups_total",
Help: "Total number of posting groups that are marked as lazy and corresponding reason.",
}, []string{"reason"})

m.lazyExpandedPostingSizeBytes = promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_bucket_store_lazy_expanded_posting_size_bytes_total",
Help: "Total number of lazy posting group size in bytes.",
Expand Down
1 change: 1 addition & 0 deletions pkg/storegateway/bucket_stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
return u.cfg.BucketStore.EstimatedMaxSeriesSizeBytes
}),
store.WithLazyExpandedPostings(u.cfg.BucketStore.LazyExpandedPostingsEnabled),
store.WithPostingGroupMaxKeySeriesRatio(u.cfg.BucketStore.LazyExpandedPostingGroupMaxKeySeriesRatio),
store.WithDontResort(true), // Cortex doesn't need to resort series in store gateway.
}
if u.logLevel.String() == "debug" {
Expand Down
Loading

0 comments on commit 68012ec

Please sign in to comment.