From 2d918f389463a225f81e90dcb93389ba73142126 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Date: Wed, 16 Oct 2024 23:34:44 -0700 Subject: [PATCH 1/6] Post release doc and code update (#6275) Signed-off-by: Friedrich Gonzalez --- RELEASE.md | 4 ++-- docs/getting-started/.env | 4 ++-- integration/backward_compatibility_test.go | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 078bfa688c..fb9b03bd65 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -110,8 +110,8 @@ To publish a stable release: 1. Do not change the release branch directly; make a PR to the release-X.Y branch with VERSION and any CHANGELOG changes. 1. Ensure the `VERSION` file has **no** `-rc.X` suffix 1. Update the Cortex version in the following locations: - - Kubernetes manifests located at `k8s/` - - Documentation located at `docs/` + - `docs/getting-started/.env` + - Bump version in cortex-helm-chart via PR, for example https://github.com/cortexproject/cortex-helm-chart/pull/501 1. After merging your PR to release branch, `git tag` the new release (see [How to tag a release](#how-to-tag-a-release)) from release branch. 1. Wait until CI pipeline succeeded (once a tag is created, the release process through GitHub actions will be triggered for this tag) 1. Create a release in GitHub diff --git a/docs/getting-started/.env b/docs/getting-started/.env index 58aa8c3d91..ab10e56ed3 100644 --- a/docs/getting-started/.env +++ b/docs/getting-started/.env @@ -1,4 +1,4 @@ -CORTEX_VERSION=v1.17.1 +CORTEX_VERSION=v1.18.1 GRAFANA_VERSION=10.4.2 PROMETHEUS_VERSION=v2.51.2 -SEAWEEDFS_VERSION=3.67 \ No newline at end of file +SEAWEEDFS_VERSION=3.67 diff --git a/integration/backward_compatibility_test.go b/integration/backward_compatibility_test.go index e209346528..e2f1e2d791 100644 --- a/integration/backward_compatibility_test.go +++ b/integration/backward_compatibility_test.go @@ -74,6 +74,7 @@ var ( "quay.io/cortexproject/cortex:v1.17.0": nil, "quay.io/cortexproject/cortex:v1.17.1": nil, "quay.io/cortexproject/cortex:v1.18.0": nil, + "quay.io/cortexproject/cortex:v1.18.1": nil, } ) From 441ed1da6fe524a707b04b5e081fca13ef049f04 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Date: Thu, 17 Oct 2024 00:55:51 -0700 Subject: [PATCH 2/6] Backport go upgrade to patch CVEs (#6264) (#6273) * Backport go upgrade to patch CVEs * Update VERSION file --------- Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 4 ++++ VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a2ddf904..9dceb75c3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,10 @@ * [ENHANCEMENT] Upgrade build image and Go version to 1.23.2. #6261 #6262 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 +## 1.18.1 2024-10-14 + +* [BUGFIX] Backporting upgrade to go 1.22.7 to patch CVE-2024-34155, CVE-2024-34156, CVE-2024-34158 #6217 #6264 + ## 1.18.0 2024-09-03 * [CHANGE] Ingester: Remove `-querier.query-store-for-labels-enabled` flag. Querying long-term store for labels is always enabled. #5984 diff --git a/VERSION b/VERSION index 84cc529467..ec6d649be6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.18.0 +1.18.1 From e455bda6afe2c233d31c889b85b3ac57439167c1 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 17 Oct 2024 12:30:50 -0700 Subject: [PATCH 3/6] Reusing the grpc client to peform healthcheck (#6260) Signed-off-by: alanprot --- pkg/util/grpcclient/health_check.go | 101 ++++++++++++++++------- pkg/util/grpcclient/health_check_test.go | 5 +- 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/pkg/util/grpcclient/health_check.go b/pkg/util/grpcclient/health_check.go index 9cbcc064d5..331d8d110a 100644 --- a/pkg/util/grpcclient/health_check.go +++ b/pkg/util/grpcclient/health_check.go @@ -2,7 +2,6 @@ package grpcclient import ( "context" - "errors" "flag" "fmt" "io" @@ -11,41 +10,49 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/gogo/status" "github.com/weaveworks/common/user" "go.uber.org/atomic" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/health/grpc_health_v1" "github.com/cortexproject/cortex/pkg/util/services" ) var ( - unhealthyErr = errors.New("instance marked as unhealthy") + unhealthyErr = status.Error(codes.Unavailable, "instance marked as unhealthy") ) type HealthCheckConfig struct { *HealthCheckInterceptors `yaml:"-"` - UnhealthyThreshold int `yaml:"unhealthy_threshold"` + UnhealthyThreshold int64 `yaml:"unhealthy_threshold"` Interval time.Duration `yaml:"interval"` Timeout time.Duration `yaml:"timeout"` } // RegisterFlagsWithPrefix for Config. func (cfg *HealthCheckConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.IntVar(&cfg.UnhealthyThreshold, prefix+".unhealthy-threshold", 0, "The number of consecutive failed health checks required before considering a target unhealthy. 0 means disabled.") + f.Int64Var(&cfg.UnhealthyThreshold, prefix+".unhealthy-threshold", 0, "The number of consecutive failed health checks required before considering a target unhealthy. 0 means disabled.") f.DurationVar(&cfg.Timeout, prefix+".timeout", 1*time.Second, "The amount of time during which no response from a target means a failed health check.") f.DurationVar(&cfg.Interval, prefix+".interval", 5*time.Second, "The approximate amount of time between health checks of an individual target.") } -type healthCheckEntry struct { - address string - clientConfig *ConfigWithHealthCheck +type healthCheckClient struct { + grpc_health_v1.HealthClient + io.Closer +} - sync.RWMutex - unhealthyCount int +type healthCheckEntry struct { + address string + clientConfig *ConfigWithHealthCheck lastCheckTime atomic.Time lastTickTime atomic.Time + unhealthyCount atomic.Int64 + + healthCheckClientMutex sync.RWMutex + healthCheckClient *healthCheckClient } type HealthCheckInterceptors struct { @@ -75,18 +82,14 @@ func NewHealthCheckInterceptors(logger log.Logger) *HealthCheckInterceptors { } func (e *healthCheckEntry) isHealthy() bool { - e.RLock() - defer e.RUnlock() - return e.unhealthyCount < e.clientConfig.HealthCheckConfig.UnhealthyThreshold + return e.unhealthyCount.Load() < e.clientConfig.HealthCheckConfig.UnhealthyThreshold } func (e *healthCheckEntry) recordHealth(err error) error { - e.Lock() - defer e.Unlock() if err != nil { - e.unhealthyCount++ + e.unhealthyCount.Inc() } else { - e.unhealthyCount = 0 + e.unhealthyCount.Store(0) } return err @@ -96,6 +99,51 @@ func (e *healthCheckEntry) tick() { e.lastTickTime.Store(time.Now()) } +func (e *healthCheckEntry) close() error { + e.healthCheckClientMutex.Lock() + defer e.healthCheckClientMutex.Unlock() + + if e.healthCheckClient != nil { + err := e.healthCheckClient.Close() + e.healthCheckClient = nil + return err + } + + return nil +} + +func (e *healthCheckEntry) getClient(factory func(cc *grpc.ClientConn) (grpc_health_v1.HealthClient, io.Closer)) (*healthCheckClient, error) { + e.healthCheckClientMutex.RLock() + c := e.healthCheckClient + e.healthCheckClientMutex.RUnlock() + + if c != nil { + return c, nil + } + + e.healthCheckClientMutex.Lock() + defer e.healthCheckClientMutex.Unlock() + + if e.healthCheckClient == nil { + dialOpts, err := e.clientConfig.Config.DialOption(nil, nil) + if err != nil { + return nil, err + } + conn, err := grpc.NewClient(e.address, dialOpts...) + if err != nil { + return nil, err + } + + client, closer := factory(conn) + e.healthCheckClient = &healthCheckClient{ + HealthClient: client, + Closer: closer, + } + } + + return e.healthCheckClient, nil +} + func (h *HealthCheckInterceptors) registeredInstances() []*healthCheckEntry { h.RLock() defer h.RUnlock() @@ -112,6 +160,9 @@ func (h *HealthCheckInterceptors) iteration(ctx context.Context) error { for _, instance := range h.registeredInstances() { if time.Since(instance.lastTickTime.Load()) >= h.instanceGcTimeout { h.Lock() + if err := instance.close(); err != nil { + level.Warn(h.logger).Log("msg", "Error closing health check", "err", err) + } delete(h.activeInstances, instance.address) h.Unlock() continue @@ -124,25 +175,13 @@ func (h *HealthCheckInterceptors) iteration(ctx context.Context) error { instance.lastCheckTime.Store(time.Now()) go func(i *healthCheckEntry) { - dialOpts, err := i.clientConfig.Config.DialOption(nil, nil) - if err != nil { - level.Error(h.logger).Log("msg", "error creating dialOpts to perform healthcheck", "address", i.address, "err", err) - return - } - conn, err := grpc.NewClient(i.address, dialOpts...) + client, err := i.getClient(h.healthClientFactory) + if err != nil { - level.Error(h.logger).Log("msg", "error creating client to perform healthcheck", "address", i.address, "err", err) + level.Error(h.logger).Log("msg", "error creating healthcheck client to perform healthcheck", "address", i.address, "err", err) return } - client, closer := h.healthClientFactory(conn) - - defer func() { - if err := closer.Close(); err != nil { - level.Warn(h.logger).Log("msg", "error closing connection", "address", i.address, "err", err) - } - }() - if err := i.recordHealth(healthCheck(client, i.clientConfig.HealthCheckConfig.Timeout)); !i.isHealthy() { level.Warn(h.logger).Log("msg", "instance marked as unhealthy", "address", i.address, "err", err) } diff --git a/pkg/util/grpcclient/health_check_test.go b/pkg/util/grpcclient/health_check_test.go index 6491878506..7d2b37c37c 100644 --- a/pkg/util/grpcclient/health_check_test.go +++ b/pkg/util/grpcclient/health_check_test.go @@ -11,8 +11,10 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/status" utillog "github.com/cortexproject/cortex/pkg/util/log" "github.com/cortexproject/cortex/pkg/util/services" @@ -136,7 +138,8 @@ func TestNewHealthCheckInterceptors(t *testing.T) { require.False(t, hMock.open.Load()) cortex_testutil.Poll(t, time.Second, true, func() interface{} { - return errors.Is(ui(context.Background(), "", struct{}{}, struct{}{}, ccUnhealthy, invoker), unhealthyErr) + err := ui(context.Background(), "", struct{}{}, struct{}{}, ccUnhealthy, invoker) + return errors.Is(err, unhealthyErr) || status.Code(err) == codes.Unavailable }) // Other instances should remain healthy From 1e5b01fd60dbeb72419de8e698f96d28d1fb41d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:56:54 -0700 Subject: [PATCH 4/6] Bump github.com/efficientgo/core from 1.0.0-rc.2 to 1.0.0-rc.3 (#6203) Bumps [github.com/efficientgo/core](https://github.com/efficientgo/core) from 1.0.0-rc.2 to 1.0.0-rc.3. - [Release notes](https://github.com/efficientgo/core/releases) - [Commits](https://github.com/efficientgo/core/compare/v1.0.0-rc.2...v1.0.0-rc.3) --- updated-dependencies: - dependency-name: github.com/efficientgo/core dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> From 3865567471f9241ef5670ea81b7d544a0a2e3977 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Thu, 17 Oct 2024 13:46:00 -0700 Subject: [PATCH 5/6] expose flag for max store gateway consistency check attempts (#6276) --- CHANGELOG.md | 1 + docs/blocks-storage/querier.md | 6 +++ docs/configuration/config-file-reference.md | 6 +++ pkg/querier/blocks_store_queryable.go | 55 +++++++++++---------- pkg/querier/blocks_store_queryable_test.go | 11 ++++- pkg/querier/querier.go | 9 ++++ 6 files changed, 62 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dceb75c3c..82f0a9367e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ * [ENHANCEMENT] Ingester: Add matchers to ingester LabelNames() and LabelNamesStream() RPC. #6209 * [ENHANCEMENT] Ingester/Store Gateway Clients: Introduce an experimental HealthCheck handler to quickly fail requests directed to unhealthy targets. #6225 #6257 * [ENHANCEMENT] Upgrade build image and Go version to 1.23.2. #6261 #6262 +* [ENHANCEMENT] Querier/Ruler: Expose `store_gateway_consistency_check_max_attempts` for max retries when querying store gateway in consistency check. #6276 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 ## 1.18.1 2024-10-14 diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index 4d9a0af9e6..360775fe7e 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -226,6 +226,12 @@ querier: # CLI flag: -querier.store-gateway-query-stats-enabled [store_gateway_query_stats: | default = true] + # The maximum number of times we attempt fetching missing blocks from + # different store-gateways. If no more store-gateways are left (ie. due to + # lower replication factor) than we'll end the retries earlier + # CLI flag: -querier.store-gateway-consistency-check-max-attempts + [store_gateway_consistency_check_max_attempts: | default = 3] + # When distributor's sharding strategy is shuffle-sharding and this setting is # > 0, queriers fetch in-memory series from the minimum set of required # ingesters, selecting only ingesters which may have received series since diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 2a601053c2..a841909e5c 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3872,6 +3872,12 @@ store_gateway_client: # CLI flag: -querier.store-gateway-query-stats-enabled [store_gateway_query_stats: | default = true] +# The maximum number of times we attempt fetching missing blocks from different +# store-gateways. If no more store-gateways are left (ie. due to lower +# replication factor) than we'll end the retries earlier +# CLI flag: -querier.store-gateway-consistency-check-max-attempts +[store_gateway_consistency_check_max_attempts: | default = 3] + # When distributor's sharding strategy is shuffle-sharding and this setting is > # 0, queriers fetch in-memory series from the minimum set of required ingesters, # selecting only ingesters which may have received series since 'now - lookback diff --git a/pkg/querier/blocks_store_queryable.go b/pkg/querier/blocks_store_queryable.go index 6d09193edc..9c08b40ee1 100644 --- a/pkg/querier/blocks_store_queryable.go +++ b/pkg/querier/blocks_store_queryable.go @@ -141,7 +141,8 @@ type BlocksStoreQueryable struct { metrics *blocksStoreQueryableMetrics limits BlocksStoreLimits - storeGatewayQueryStatsEnabled bool + storeGatewayQueryStatsEnabled bool + storeGatewayConsistencyCheckMaxAttempts int // Subservices manager. subservices *services.Manager @@ -153,8 +154,7 @@ func NewBlocksStoreQueryable( finder BlocksFinder, consistency *BlocksConsistencyChecker, limits BlocksStoreLimits, - queryStoreAfter time.Duration, - storeGatewayQueryStatsEnabled bool, + config Config, logger log.Logger, reg prometheus.Registerer, ) (*BlocksStoreQueryable, error) { @@ -164,16 +164,17 @@ func NewBlocksStoreQueryable( } q := &BlocksStoreQueryable{ - stores: stores, - finder: finder, - consistency: consistency, - queryStoreAfter: queryStoreAfter, - logger: logger, - subservices: manager, - subservicesWatcher: services.NewFailureWatcher(), - metrics: newBlocksStoreQueryableMetrics(reg), - limits: limits, - storeGatewayQueryStatsEnabled: storeGatewayQueryStatsEnabled, + stores: stores, + finder: finder, + consistency: consistency, + queryStoreAfter: config.QueryStoreAfter, + logger: logger, + subservices: manager, + subservicesWatcher: services.NewFailureWatcher(), + metrics: newBlocksStoreQueryableMetrics(reg), + limits: limits, + storeGatewayQueryStatsEnabled: config.StoreGatewayQueryStatsEnabled, + storeGatewayConsistencyCheckMaxAttempts: config.StoreGatewayConsistencyCheckMaxAttempts, } q.Service = services.NewBasicService(q.starting, q.running, q.stopping) @@ -264,7 +265,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa reg, ) - return NewBlocksStoreQueryable(stores, finder, consistency, limits, querierCfg.QueryStoreAfter, querierCfg.StoreGatewayQueryStatsEnabled, logger, reg) + return NewBlocksStoreQueryable(stores, finder, consistency, limits, querierCfg, logger, reg) } func (q *BlocksStoreQueryable) starting(ctx context.Context) error { @@ -299,16 +300,17 @@ func (q *BlocksStoreQueryable) Querier(mint, maxt int64) (storage.Querier, error } return &blocksStoreQuerier{ - minT: mint, - maxT: maxt, - finder: q.finder, - stores: q.stores, - metrics: q.metrics, - limits: q.limits, - consistency: q.consistency, - logger: q.logger, - queryStoreAfter: q.queryStoreAfter, - storeGatewayQueryStatsEnabled: q.storeGatewayQueryStatsEnabled, + minT: mint, + maxT: maxt, + finder: q.finder, + stores: q.stores, + metrics: q.metrics, + limits: q.limits, + consistency: q.consistency, + logger: q.logger, + queryStoreAfter: q.queryStoreAfter, + storeGatewayQueryStatsEnabled: q.storeGatewayQueryStatsEnabled, + storeGatewayConsistencyCheckMaxAttempts: q.storeGatewayConsistencyCheckMaxAttempts, }, nil } @@ -328,6 +330,9 @@ type blocksStoreQuerier struct { // If enabled, query stats of store gateway requests will be logged // using `info` level. storeGatewayQueryStatsEnabled bool + + // The maximum number of times we attempt fetching missing blocks from different Store Gateways. + storeGatewayConsistencyCheckMaxAttempts int } // Select implements storage.Querier interface. @@ -534,7 +539,7 @@ func (q *blocksStoreQuerier) queryWithConsistencyCheck(ctx context.Context, logg retryableError error ) - for attempt := 1; attempt <= maxFetchSeriesAttempts; attempt++ { + for attempt := 1; attempt <= q.storeGatewayConsistencyCheckMaxAttempts; attempt++ { // Find the set of store-gateway instances having the blocks. The exclude parameter is the // map of blocks queried so far, with the list of store-gateway addresses for each block. clients, err := q.stores.GetClientsFor(userID, remainingBlocks, attemptedBlocks, attemptedBlocksZones) diff --git a/pkg/querier/blocks_store_queryable_test.go b/pkg/querier/blocks_store_queryable_test.go index 5cbb2d2020..390f27107a 100644 --- a/pkg/querier/blocks_store_queryable_test.go +++ b/pkg/querier/blocks_store_queryable_test.go @@ -1552,6 +1552,8 @@ func TestBlocksStoreQuerier_Select(t *testing.T) { logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: testData.limits, + + storeGatewayConsistencyCheckMaxAttempts: 3, } matchers := []*labels.Matcher{ @@ -2148,6 +2150,8 @@ func TestBlocksStoreQuerier_Labels(t *testing.T) { logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: &blocksStoreLimitsMock{}, + + storeGatewayConsistencyCheckMaxAttempts: 3, } if testFunc == "LabelNames" { @@ -2371,7 +2375,12 @@ func TestBlocksStoreQuerier_PromQLExecution(t *testing.T) { } // Instance the querier that will be executed to run the query. - queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistencyChecker(0, 0, logger, nil), &blocksStoreLimitsMock{}, 0, false, logger, nil) + cfg := Config{ + QueryStoreAfter: 0, + StoreGatewayQueryStatsEnabled: false, + StoreGatewayConsistencyCheckMaxAttempts: 3, + } + queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistencyChecker(0, 0, logger, nil), &blocksStoreLimitsMock{}, cfg, logger, nil) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), queryable)) defer services.StopAndAwaitTerminated(context.Background(), queryable) // nolint:errcheck diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 77a04b2093..34d1f8b19e 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -79,6 +79,9 @@ type Config struct { StoreGatewayClient ClientConfig `yaml:"store_gateway_client"` StoreGatewayQueryStatsEnabled bool `yaml:"store_gateway_query_stats"` + // The maximum number of times we attempt fetching missing blocks from different Store Gateways. + StoreGatewayConsistencyCheckMaxAttempts int `yaml:"store_gateway_consistency_check_max_attempts"` + ShuffleShardingIngestersLookbackPeriod time.Duration `yaml:"shuffle_sharding_ingesters_lookback_period"` // Experimental. Use https://github.com/thanos-io/promql-engine rather than @@ -94,6 +97,7 @@ var ( errShuffleShardingLookbackLessThanQueryStoreAfter = errors.New("the shuffle-sharding lookback period should be greater or equal than the configured 'query store after'") errEmptyTimeRange = errors.New("empty time range") errUnsupportedResponseCompression = errors.New("unsupported response compression. Supported compression 'gzip' and '' (disable compression)") + errInvalidConsistencyCheckAttempts = errors.New("store gateway consistency check max attempts should be greater or equal than 1") ) // RegisterFlags adds the flags required to config this to the given FlagSet. @@ -122,6 +126,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.ActiveQueryTrackerDir, "querier.active-query-tracker-dir", "./active-query-tracker", "Active query tracker monitors active queries, and writes them to the file in given directory. If Cortex discovers any queries in this log during startup, it will log them to the log file. Setting to empty value disables active query tracker, which also disables -querier.max-concurrent option.") f.StringVar(&cfg.StoreGatewayAddresses, "querier.store-gateway-addresses", "", "Comma separated list of store-gateway addresses in DNS Service Discovery format. This option should be set when using the blocks storage and the store-gateway sharding is disabled (when enabled, the store-gateway instances form a ring and addresses are picked from the ring).") f.BoolVar(&cfg.StoreGatewayQueryStatsEnabled, "querier.store-gateway-query-stats-enabled", true, "If enabled, store gateway query stats will be logged using `info` log level.") + f.IntVar(&cfg.StoreGatewayConsistencyCheckMaxAttempts, "querier.store-gateway-consistency-check-max-attempts", maxFetchSeriesAttempts, "The maximum number of times we attempt fetching missing blocks from different store-gateways. If no more store-gateways are left (ie. due to lower replication factor) than we'll end the retries earlier") f.DurationVar(&cfg.LookbackDelta, "querier.lookback-delta", 5*time.Minute, "Time since the last sample after which a time series is considered stale and ignored by expression evaluations.") f.DurationVar(&cfg.ShuffleShardingIngestersLookbackPeriod, "querier.shuffle-sharding-ingesters-lookback-period", 0, "When distributor's sharding strategy is shuffle-sharding and this setting is > 0, queriers fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since 'now - lookback period'. The lookback period should be greater or equal than the configured 'query store after' and 'query ingesters within'. If this setting is 0, queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).") f.BoolVar(&cfg.ThanosEngine, "querier.thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.") @@ -148,6 +153,10 @@ func (cfg *Config) Validate() error { } } + if cfg.StoreGatewayConsistencyCheckMaxAttempts < 1 { + return errInvalidConsistencyCheckAttempts + } + return nil } From e070ec6b347d41dabf32eb48d7ef3ebe81df8bb2 Mon Sep 17 00:00:00 2001 From: SungJin1212 Date: Sat, 19 Oct 2024 10:13:56 +0900 Subject: [PATCH 6/6] Add alertmanager config to single process documents (#6274) --- .../single-process-config-blocks-gossip-1.yaml | 9 +++++++++ .../single-process-config-blocks-gossip-2.yaml | 9 +++++++++ .../single-process-config-blocks-local.yaml | 9 +++++++++ docs/configuration/single-process-config-blocks-tls.yaml | 9 +++++++++ docs/configuration/single-process-config-blocks.yaml | 9 +++++++++ 5 files changed, 45 insertions(+) diff --git a/docs/configuration/single-process-config-blocks-gossip-1.yaml b/docs/configuration/single-process-config-blocks-gossip-1.yaml index 5d53411921..7c7b3b515a 100644 --- a/docs/configuration/single-process-config-blocks-gossip-1.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-1.yaml @@ -96,3 +96,12 @@ ruler_storage: backend: local local: directory: /tmp/cortex/rules + +alertmanager: + external_url: http://localhost/alertmanager + +alertmanager_storage: + backend: local + local: + # Make sure file exist + path: /tmp/cortex/alerts diff --git a/docs/configuration/single-process-config-blocks-gossip-2.yaml b/docs/configuration/single-process-config-blocks-gossip-2.yaml index 419e70e9df..54dbf79548 100644 --- a/docs/configuration/single-process-config-blocks-gossip-2.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-2.yaml @@ -95,3 +95,12 @@ ruler_storage: backend: local local: directory: /tmp/cortex/rules + +alertmanager: + external_url: http://localhost/alertmanager + +alertmanager_storage: + backend: local + local: + # Make sure file exist + path: /tmp/cortex/alerts diff --git a/docs/configuration/single-process-config-blocks-local.yaml b/docs/configuration/single-process-config-blocks-local.yaml index a5eb711d97..c6b97ae0ed 100644 --- a/docs/configuration/single-process-config-blocks-local.yaml +++ b/docs/configuration/single-process-config-blocks-local.yaml @@ -88,3 +88,12 @@ ruler_storage: backend: local local: directory: /tmp/cortex/rules + +alertmanager: + external_url: http://localhost/alertmanager + +alertmanager_storage: + backend: local + local: + # Make sure file exist + path: /tmp/cortex/alerts diff --git a/docs/configuration/single-process-config-blocks-tls.yaml b/docs/configuration/single-process-config-blocks-tls.yaml index ca468192c9..352bf7c8a0 100644 --- a/docs/configuration/single-process-config-blocks-tls.yaml +++ b/docs/configuration/single-process-config-blocks-tls.yaml @@ -102,3 +102,12 @@ ruler_storage: backend: local local: directory: /tmp/cortex/rules + +alertmanager: + external_url: http://localhost/alertmanager + +alertmanager_storage: + backend: local + local: + # Make sure file exist + path: /tmp/cortex/alerts diff --git a/docs/configuration/single-process-config-blocks.yaml b/docs/configuration/single-process-config-blocks.yaml index 25d699a24f..c03d69a5e7 100644 --- a/docs/configuration/single-process-config-blocks.yaml +++ b/docs/configuration/single-process-config-blocks.yaml @@ -88,3 +88,12 @@ ruler_storage: backend: local local: directory: /tmp/cortex/rules + +alertmanager: + external_url: http://localhost/alertmanager + +alertmanager_storage: + backend: local + local: + # Make sure file exist + path: /tmp/cortex/alerts