From 18d8cbcadad2a315e42bf32e86375d484ed97b56 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Tue, 6 Aug 2024 14:23:06 -0700 Subject: [PATCH 1/6] Implement partition compaction grouper Signed-off-by: Alex Le --- docs/blocks-storage/compactor.md | 18 +- docs/configuration/config-file-reference.md | 37 +- pkg/compactor/compactor.go | 50 +- pkg/compactor/compactor_metrics.go | 30 + pkg/compactor/compactor_metrics_test.go | 8 + pkg/compactor/partition_compaction_grouper.go | 895 ++++++- .../partition_compaction_grouper_test.go | 2139 +++++++++++++++++ pkg/compactor/partition_visit_marker.go | 96 + pkg/compactor/partitioned_group_info.go | 307 +++ pkg/compactor/partitioned_group_info_test.go | 882 +++++++ pkg/compactor/shuffle_sharding_grouper.go | 15 +- pkg/storage/tsdb/meta_extensions.go | 71 + pkg/storage/tsdb/meta_extensions_test.go | 182 ++ pkg/util/validation/limits.go | 32 +- 14 files changed, 4718 insertions(+), 44 deletions(-) create mode 100644 pkg/compactor/partition_compaction_grouper_test.go create mode 100644 pkg/compactor/partition_visit_marker.go create mode 100644 pkg/compactor/partitioned_group_info.go create mode 100644 pkg/compactor/partitioned_group_info_test.go create mode 100644 pkg/storage/tsdb/meta_extensions.go create mode 100644 pkg/storage/tsdb/meta_extensions_test.go diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md index 030035f2ac..05abf73eed 100644 --- a/docs/blocks-storage/compactor.md +++ b/docs/blocks-storage/compactor.md @@ -286,18 +286,18 @@ compactor: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. - # CLI flag: -compactor.compaction-mode - [compaction_mode: | default = "default"] + # CLI flag: -compactor.compaction-strategy + [compaction_strategy: | default = "default"] - # How long block visit marker file should be considered as expired and able to - # be picked up by compactor again. - # CLI flag: -compactor.block-visit-marker-timeout - [block_visit_marker_timeout: | default = 5m] + # How long compaction visit marker file should be considered as expired and + # able to be picked up by compactor again. + # CLI flag: -compactor.compaction-visit-marker-timeout + [compaction_visit_marker_timeout: | default = 1m30s] - # How frequently block visit marker file should be updated duration + # How frequently compaction visit marker file should be updated duration # compaction. - # CLI flag: -compactor.block-visit-marker-file-update-interval - [block_visit_marker_file_update_interval: | default = 1m] + # CLI flag: -compactor.compaction-visit-marker-file-update-interval + [compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able # to be picked up by cleaner again. The value should be smaller than diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 0144dbaf31..40078bc469 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2217,17 +2217,18 @@ sharding_ring: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. -# CLI flag: -compactor.compaction-mode -[compaction_mode: | default = "default"] +# CLI flag: -compactor.compaction-strategy +[compaction_strategy: | default = "default"] -# How long block visit marker file should be considered as expired and able to -# be picked up by compactor again. -# CLI flag: -compactor.block-visit-marker-timeout -[block_visit_marker_timeout: | default = 5m] +# How long compaction visit marker file should be considered as expired and able +# to be picked up by compactor again. +# CLI flag: -compactor.compaction-visit-marker-timeout +[compaction_visit_marker_timeout: | default = 1m30s] -# How frequently block visit marker file should be updated duration compaction. -# CLI flag: -compactor.block-visit-marker-file-update-interval -[block_visit_marker_file_update_interval: | default = 1m] +# How frequently compaction visit marker file should be updated duration +# compaction. +# CLI flag: -compactor.compaction-visit-marker-file-update-interval +[compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able to # be picked up by cleaner again. The value should be smaller than @@ -3385,6 +3386,24 @@ query_rejection: # CLI flag: -compactor.tenant-shard-size [compactor_tenant_shard_size: | default = 0] +# Index size limit in bytes for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-index-size-limit-in-bytes +[compactor_partition_index_size_limit_in_bytes: | default = 0] + +# Time series count limit for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-series-count-limit +[compactor_partition_series_count_limit: | default = 0] + +# Index size limit in bytes for each level 1 compaction partition. 0 means no +# limit +# CLI flag: -compactor.partition-level1-index-size-limit-in-bytes +[compactor_partition_level1_index_size_limit_in_bytes: | default = 0] + +# Time series count limit for each level 1 compaction partition. 0 means no +# limit +# CLI flag: -compactor.partition-level1-series-count-limit +[compactor_partition_level1_series_count_limit: | default = 0] + # S3 server-side encryption type. Required to enable server-side encryption # overrides for a specific tenant. If not set, the default S3 client settings # are used. diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 817f93572b..52659dc188 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -81,7 +81,28 @@ var ( ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { - return NewPartitionCompactionGrouper(ctx, logger, bkt) + return NewPartitionCompactionGrouper( + ctx, + logger, + bkt, + cfg.AcceptMalformedIndex, + true, // Enable vertical compaction + blocksMarkedForNoCompaction, + syncerMetrics, + compactorMetrics, + metadata.NoneFunc, + cfg, + ring, + ringLifecycle.Addr, + ringLifecycle.ID, + limits, + userID, + cfg.BlockFilesConcurrency, + cfg.BlocksFetchConcurrency, + cfg.CompactionConcurrency, + true, + cfg.CompactionVisitMarkerTimeout, + noCompactionMarkFilter.NoCompactMarkedBlocks) } else { return NewShuffleShardingGrouper( ctx, @@ -102,7 +123,7 @@ var ( cfg.BlockFilesConcurrency, cfg.BlocksFetchConcurrency, cfg.CompactionConcurrency, - cfg.BlockVisitMarkerTimeout, + cfg.CompactionVisitMarkerTimeout, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed, noCompactionMarkFilter.NoCompactMarkedBlocks) @@ -133,7 +154,7 @@ var ( if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { return NewPartitionCompactionPlanner(ctx, bkt, logger) } else { - return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.BlockVisitMarkerTimeout, cfg.BlockVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) + return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.CompactionVisitMarkerTimeout, cfg.CompactionVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) } } return compactor, plannerFactory, nil @@ -182,6 +203,10 @@ type PlannerFactory func( // Limits defines limits used by the Compactor. type Limits interface { CompactorTenantShardSize(userID string) int + CompactorPartitionIndexSizeLimitInBytes(userID string) int64 + CompactorPartitionSeriesCountLimit(userID string) int64 + CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 + CompactorPartitionLevel1SeriesCountLimit(userID string) int64 } // Config holds the Compactor config. @@ -213,8 +238,8 @@ type Config struct { ShardingStrategy string `yaml:"sharding_strategy"` ShardingRing RingConfig `yaml:"sharding_ring"` - // Compaction mode. - CompactionStrategy string `yaml:"compaction_mode"` + // Compaction strategy. + CompactionStrategy string `yaml:"compaction_strategy"` // No need to add options to customize the retry backoff, // given the defaults should be fine, but allow to override @@ -226,9 +251,9 @@ type Config struct { BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` - // Block visit marker file config - BlockVisitMarkerTimeout time.Duration `yaml:"block_visit_marker_timeout"` - BlockVisitMarkerFileUpdateInterval time.Duration `yaml:"block_visit_marker_file_update_interval"` + // Compaction visit marker file config + CompactionVisitMarkerTimeout time.Duration `yaml:"compaction_visit_marker_timeout"` + CompactionVisitMarkerFileUpdateInterval time.Duration `yaml:"compaction_visit_marker_file_update_interval"` // Cleaner visit marker file config CleanerVisitMarkerTimeout time.Duration `yaml:"cleaner_visit_marker_timeout"` @@ -258,7 +283,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") f.StringVar(&cfg.ShardingStrategy, "compactor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) - f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-mode", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) + f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-strategy", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") @@ -271,8 +296,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") - f.DurationVar(&cfg.BlockVisitMarkerTimeout, "compactor.block-visit-marker-timeout", 5*time.Minute, "How long block visit marker file should be considered as expired and able to be picked up by compactor again.") - f.DurationVar(&cfg.BlockVisitMarkerFileUpdateInterval, "compactor.block-visit-marker-file-update-interval", 1*time.Minute, "How frequently block visit marker file should be updated duration compaction.") + f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 90*time.Second, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") + f.DurationVar(&cfg.CompactionVisitMarkerFileUpdateInterval, "compactor.compaction-visit-marker-file-update-interval", 1*time.Minute, "How frequently compaction visit marker file should be updated duration compaction.") f.DurationVar(&cfg.CleanerVisitMarkerTimeout, "compactor.cleaner-visit-marker-timeout", 10*time.Minute, "How long cleaner visit marker file should be considered as expired and able to be picked up by cleaner again. The value should be smaller than -compactor.cleanup-interval") f.DurationVar(&cfg.CleanerVisitMarkerFileUpdateInterval, "compactor.cleaner-visit-marker-file-update-interval", 5*time.Minute, "How frequently cleaner visit marker file should be updated when cleaning user.") @@ -305,7 +330,7 @@ func (cfg *Config) Validate(limits validation.Limits) error { } } - // Make sure a valid compaction mode is being used + // Make sure a valid compaction strategy is being used if !util.StringsContain(supportedCompactionStrategies, cfg.CompactionStrategy) { return errInvalidCompactionStrategy } @@ -761,6 +786,7 @@ func (c *Compactor) compactUsers(ctx context.Context) { continue } else if markedForDeletion { c.CompactionRunSkippedTenants.Inc() + c.compactorMetrics.deleteMetricsForDeletedTenant(userID) level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID) continue } diff --git a/pkg/compactor/compactor_metrics.go b/pkg/compactor/compactor_metrics.go index bdd3fefef0..c47bb2bde3 100644 --- a/pkg/compactor/compactor_metrics.go +++ b/pkg/compactor/compactor_metrics.go @@ -38,6 +38,7 @@ type compactorMetrics struct { verticalCompactions *prometheus.CounterVec remainingPlannedCompactions *prometheus.GaugeVec compactionErrorsCount *prometheus.CounterVec + partitionCount *prometheus.GaugeVec } const ( @@ -169,6 +170,10 @@ func newCompactorMetricsWithLabels(reg prometheus.Registerer, commonLabels []str Name: "cortex_compactor_compaction_error_total", Help: "Total number of errors from compactions.", }, append(commonLabels, compactionErrorTypesLabelName)) + m.partitionCount = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compact_group_partition_count", + Help: "Number of partitions.", + }, compactionLabels) return &m } @@ -207,3 +212,28 @@ func (m *compactorMetrics) getCommonLabelValues(userID string) []string { } return labelValues } + +func (m *compactorMetrics) initMetricWithCompactionLabelValues(labelValue ...string) { + if len(m.compactionLabels) != len(commonLabels)+len(compactionLabels) { + return + } + + m.compactions.WithLabelValues(labelValue...) + m.compactionPlanned.WithLabelValues(labelValue...) + m.compactionRunsStarted.WithLabelValues(labelValue...) + m.compactionRunsCompleted.WithLabelValues(labelValue...) + m.compactionFailures.WithLabelValues(labelValue...) + m.verticalCompactions.WithLabelValues(labelValue...) + m.partitionCount.WithLabelValues(labelValue...) +} + +func (m *compactorMetrics) deleteMetricsForDeletedTenant(userID string) { + m.syncerBlocksMarkedForDeletion.DeleteLabelValues(userID) + m.compactions.DeleteLabelValues(userID) + m.compactionPlanned.DeleteLabelValues(userID) + m.compactionRunsStarted.DeleteLabelValues(userID) + m.compactionRunsCompleted.DeleteLabelValues(userID) + m.compactionFailures.DeleteLabelValues(userID) + m.verticalCompactions.DeleteLabelValues(userID) + m.partitionCount.DeleteLabelValues(userID) +} diff --git a/pkg/compactor/compactor_metrics_test.go b/pkg/compactor/compactor_metrics_test.go index da4bb82025..b95eeabe73 100644 --- a/pkg/compactor/compactor_metrics_test.go +++ b/pkg/compactor/compactor_metrics_test.go @@ -130,6 +130,11 @@ func TestSyncerMetrics(t *testing.T) { cortex_compactor_compaction_error_total{type="unauthorized",user="aaa"} 477730 cortex_compactor_compaction_error_total{type="unauthorized",user="bbb"} 488840 cortex_compactor_compaction_error_total{type="unauthorized",user="ccc"} 499950 + # HELP cortex_compact_group_partition_count Number of partitions. + # TYPE cortex_compact_group_partition_count gauge + cortex_compact_group_partition_count{user="aaa"} 511060 + cortex_compact_group_partition_count{user="bbb"} 522170 + cortex_compact_group_partition_count{user="ccc"} 533280 `)) require.NoError(t, err) @@ -183,4 +188,7 @@ func generateTestData(cm *compactorMetrics, base float64) { cm.compactionErrorsCount.WithLabelValues("aaa", unauthorizedError).Add(43 * base) cm.compactionErrorsCount.WithLabelValues("bbb", unauthorizedError).Add(44 * base) cm.compactionErrorsCount.WithLabelValues("ccc", unauthorizedError).Add(45 * base) + cm.partitionCount.WithLabelValues("aaa").Add(46 * base) + cm.partitionCount.WithLabelValues("bbb").Add(47 * base) + cm.partitionCount.WithLabelValues("ccc").Add(48 * base) } diff --git a/pkg/compactor/partition_compaction_grouper.go b/pkg/compactor/partition_compaction_grouper.go index c3687f7e6a..7eb48ab723 100644 --- a/pkg/compactor/partition_compaction_grouper.go +++ b/pkg/compactor/partition_compaction_grouper.go @@ -2,37 +2,918 @@ package compactor import ( "context" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "time" "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/compact" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +var ( + DUMMY_BLOCK_ID = ulid.ULID{} ) type PartitionCompactionGrouper struct { - ctx context.Context - logger log.Logger - bkt objstore.InstrumentedBucket + ctx context.Context + logger log.Logger + bkt objstore.InstrumentedBucket + acceptMalformedIndex bool + enableVerticalCompaction bool + blocksMarkedForNoCompact prometheus.Counter + hashFunc metadata.HashFunc + syncerMetrics *compact.SyncerMetrics + compactorMetrics *compactorMetrics + compactorCfg Config + limits Limits + userID string + blockFilesConcurrency int + blocksFetchConcurrency int + compactionConcurrency int + + doRandomPick bool + + ring ring.ReadRing + ringLifecyclerAddr string + ringLifecyclerID string + + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark + partitionVisitMarkerTimeout time.Duration } func NewPartitionCompactionGrouper( ctx context.Context, logger log.Logger, bkt objstore.InstrumentedBucket, + acceptMalformedIndex bool, + enableVerticalCompaction bool, + blocksMarkedForNoCompact prometheus.Counter, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, + hashFunc metadata.HashFunc, + compactorCfg Config, + ring ring.ReadRing, + ringLifecyclerAddr string, + ringLifecyclerID string, + limits Limits, + userID string, + blockFilesConcurrency int, + blocksFetchConcurrency int, + compactionConcurrency int, + doRandomPick bool, + partitionVisitMarkerTimeout time.Duration, + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, ) *PartitionCompactionGrouper { if logger == nil { logger = log.NewNopLogger() } return &PartitionCompactionGrouper{ - ctx: ctx, - logger: logger, - bkt: bkt, + ctx: ctx, + logger: logger, + bkt: bkt, + acceptMalformedIndex: acceptMalformedIndex, + enableVerticalCompaction: enableVerticalCompaction, + blocksMarkedForNoCompact: blocksMarkedForNoCompact, + hashFunc: hashFunc, + syncerMetrics: syncerMetrics, + compactorMetrics: compactorMetrics, + compactorCfg: compactorCfg, + ring: ring, + ringLifecyclerAddr: ringLifecyclerAddr, + ringLifecyclerID: ringLifecyclerID, + limits: limits, + userID: userID, + blockFilesConcurrency: blockFilesConcurrency, + blocksFetchConcurrency: blocksFetchConcurrency, + compactionConcurrency: compactionConcurrency, + doRandomPick: doRandomPick, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + noCompBlocksFunc: noCompBlocksFunc, } } // Groups function modified from https://github.com/cortexproject/cortex/pull/2616 func (g *PartitionCompactionGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*compact.Group, err error) { - panic("PartitionCompactionGrouper not implemented") + // Check if this compactor is on the subring. + // If the compactor is not on the subring when using the userID as a identifier + // no plans generated below will be owned by the compactor so we can just return an empty array + // as there will be no planned groups + onSubring, err := g.checkSubringForCompactor() + if err != nil { + return nil, errors.Wrap(err, "unable to check sub-ring for compactor ownership") + } + if !onSubring { + level.Debug(g.logger).Log("msg", "compactor is not on the current sub-ring skipping user", "user", g.userID) + return nil, nil + } + + // Filter out no compact blocks + noCompactMarked := g.noCompBlocksFunc() + for id, b := range blocks { + if _, excluded := noCompactMarked[b.ULID]; excluded { + delete(blocks, id) + } + } + + partitionCompactionJobs, err := g.generateCompactionJobs(blocks) + if err != nil { + return nil, errors.Wrap(err, "unable to generate compaction jobs") + } + + pickedPartitionCompactionJobs := g.pickPartitionCompactionJob(partitionCompactionJobs) + + return pickedPartitionCompactionJobs, nil +} + +// Check whether this compactor exists on the subring based on user ID +func (g *PartitionCompactionGrouper) checkSubringForCompactor() (bool, error) { + subRing := g.ring.ShuffleShard(g.userID, g.limits.CompactorTenantShardSize(g.userID)) + + rs, err := subRing.GetAllHealthy(RingOp) + if err != nil { + return false, err + } + + return rs.Includes(g.ringLifecyclerAddr), nil +} + +func (g *PartitionCompactionGrouper) generateCompactionJobs(blocks map[ulid.ULID]*metadata.Meta) ([]*blocksGroupWithPartition, error) { + timeRanges := g.compactorCfg.BlockRanges.ToMilliseconds() + + groups := g.groupBlocks(blocks, timeRanges) + + existingPartitionedGroups, err := g.loadExistingPartitionedGroups() + if err != nil { + return nil, err + } + for _, p := range existingPartitionedGroups { + var blockIDs []string + for _, b := range p.getAllBlocks() { + blockIDs = append(blockIDs, b.String()) + } + level.Info(g.logger).Log("msg", "existing partitioned group", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + allPartitionedGroup, err := g.generatePartitionedGroups(blocks, groups, existingPartitionedGroups, timeRanges) + if err != nil { + return nil, err + } + g.sortPartitionedGroups(allPartitionedGroup) + for _, p := range allPartitionedGroup { + var blockIDs []string + for _, b := range p.getAllBlocks() { + blockIDs = append(blockIDs, b.String()) + } + level.Info(g.logger).Log("msg", "partitioned group ready for compaction", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + partitionCompactionJobs := g.generatePartitionCompactionJobs(blocks, allPartitionedGroup, g.doRandomPick) + for _, p := range partitionCompactionJobs { + var blockIDs []string + for _, b := range p.blocks { + blockIDs = append(blockIDs, b.ULID.String()) + } + level.Info(g.logger).Log("msg", "partitioned compaction job", "partitioned_group_id", p.partitionedGroupInfo.PartitionedGroupID, "partition_id", p.partition.PartitionID, "partition_count", p.partitionedGroupInfo.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + return partitionCompactionJobs, nil +} + +func (g *PartitionCompactionGrouper) loadExistingPartitionedGroups() (map[uint32]*PartitionedGroupInfo, error) { + partitionedGroups := make(map[uint32]*PartitionedGroupInfo) + err := g.bkt.Iter(g.ctx, PartitionedGroupDirectory, func(file string) error { + if !strings.Contains(file, PartitionVisitMarkerDirectory) { + partitionedGroup, err := ReadPartitionedGroupInfoFile(g.ctx, g.bkt, g.logger, file) + if err != nil { + return err + } + partitionedGroups[partitionedGroup.PartitionedGroupID] = partitionedGroup + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "unable to load existing partitioned groups") + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) groupBlocks(blocks map[ulid.ULID]*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + // First of all we have to group blocks using the Thanos default + // grouping (based on downsample resolution + external labels). + mainGroups := map[string][]*metadata.Meta{} + for _, b := range blocks { + key := b.Thanos.GroupKey() + mainGroups[key] = append(mainGroups[key], b) + } + + var groups []blocksGroupWithPartition + for _, mainBlocks := range mainGroups { + groups = append(groups, g.groupBlocksByCompactableRanges(mainBlocks, timeRanges)...) + } + + g.sortBlockGroups(groups) + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByCompactableRanges(blocks []*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + if len(blocks) == 0 { + return nil + } + + // Sort blocks by min time. + sortMetasByMinTime(blocks) + + var groups []blocksGroupWithPartition + + for _, tr := range timeRanges { + groups = append(groups, g.groupBlocksByRange(blocks, tr)...) + } + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByRange(blocks []*metadata.Meta, tr int64) []blocksGroupWithPartition { + var ret []blocksGroupWithPartition + + for i := 0; i < len(blocks); { + var ( + group blocksGroupWithPartition + m = blocks[i] + ) + + group.rangeStart = getRangeStart(m, tr) + group.rangeEnd = group.rangeStart + tr + + // Skip blocks that don't fall into the range. This can happen via mis-alignment or + // by being the multiple of the intended range. + if m.MaxTime > group.rangeEnd { + i++ + continue + } + + // Add all blocks to the current group that are within [t0, t0+tr]. + for ; i < len(blocks); i++ { + // If the block does not start within this group, then we should break the iteration + // and move it to the next group. + if blocks[i].MinTime >= group.rangeEnd { + break + } + + // If the block doesn't fall into this group, but it started within this group then it + // means it spans across multiple ranges and we should skip it. + if blocks[i].MaxTime > group.rangeEnd { + continue + } + + group.blocks = append(group.blocks, blocks[i]) + } + + if len(group.blocks) > 1 { + ret = append(ret, group) + } + } + + return ret +} + +func (g *PartitionCompactionGrouper) sortBlockGroups(groups []blocksGroupWithPartition) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(groups, func(i, j int) bool { + iGroup := groups[i] + jGroup := groups[j] + iRangeStart := iGroup.rangeStart + iRangeEnd := iGroup.rangeEnd + jRangeStart := jGroup.rangeStart + jRangeEnd := jGroup.rangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + + iGroupHash := hashGroup(g.userID, iRangeStart, iRangeEnd) + iGroupKey := createGroupKeyWithPartition(iGroupHash, iGroup) + jGroupHash := hashGroup(g.userID, jRangeStart, jRangeEnd) + jGroupKey := createGroupKeyWithPartition(jGroupHash, jGroup) + // Guarantee stable sort for tests. + return iGroupKey < jGroupKey + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionedGroups(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRanges []int64) ([]*PartitionedGroupInfo, error) { + var allPartitionedGroup []*PartitionedGroupInfo + for _, partitionedGroup := range existingPartitionedGroups { + status := partitionedGroup.getPartitionedGroupStatus(g.ctx, g.bkt, g.partitionVisitMarkerTimeout, g.logger) + if !status.IsCompleted { + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + } + + timeRangeChecker := NewCompletenessChecker(blocks, groups, timeRanges) + for _, startTimeMap := range timeRangeChecker.TimeRangesStatus { + for _, status := range startTimeMap { + if !status.canTakeCompaction { + level.Info(g.logger).Log("msg", "incomplete time range", "rangeStart", status.rangeStartTime().String(), "rangeEnd", status.rangeEndTime().String(), + "timeRange", status.timeRangeDuration().String(), "previousTimeRange", status.previousTimeRangeDuration().String()) + } + } + } + + for _, group := range groups { + groupHash := hashGroup(g.userID, group.rangeStart, group.rangeEnd) + logger := log.With(g.logger, "partitioned_group_id", groupHash, "rangeStart", group.rangeStartTime().String(), "rangeEnd", group.rangeEndTime().String()) + + var blockIDs []string + for _, b := range group.blocks { + blockIDs = append(blockIDs, b.ULID.String()) + } + level.Info(logger).Log("msg", "block group", "blocks", strings.Join(blockIDs, ",")) + + level.Info(logger).Log("msg", "start generating partitioned group") + if g.shouldSkipGroup(logger, group, groupHash, existingPartitionedGroups, timeRangeChecker) { + level.Info(logger).Log("msg", "skip generating partitioned group") + continue + } + partitionedGroup, err := g.generatePartitionBlockGroup(group, groupHash) + if err != nil { + return nil, errors.Wrapf(err, "unable to generate partitioned group: %d", groupHash) + } + level.Info(logger).Log("msg", "generated partitioned group") + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + return allPartitionedGroup, nil +} + +func (g *PartitionCompactionGrouper) shouldSkipGroup(logger log.Logger, group blocksGroupWithPartition, partitionedGroupID uint32, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRangeChecker TimeRangeChecker) bool { + if _, ok := existingPartitionedGroups[partitionedGroupID]; ok { + level.Info(logger).Log("msg", "skip group", "reason", "partitioned group already exists") + return true + } + tr := group.rangeEnd - group.rangeStart + if status, ok := timeRangeChecker.TimeRangesStatus[tr][group.rangeStart]; !ok { + level.Info(logger).Log("msg", "skip group", "reason", "unable to get time range status") + return true + } else if !status.canTakeCompaction { + level.Info(logger).Log("msg", "skip group", "reason", "time range cannot take compaction job") + return true + } + + // Check if all blocks in group having same partitioned group id as destination partitionedGroupID + for _, b := range group.blocks { + partitionInfo, err := tsdb.GetPartitionInfo(*b) + if err != nil || partitionInfo == nil || partitionInfo.PartitionedGroupID != partitionedGroupID { + return false + } + } + level.Info(logger).Log("msg", "skip group", "reason", "all blocks in the group have partitioned group id equals to new group partitioned_group_id") + return true +} + +func (g *PartitionCompactionGrouper) generatePartitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionedGroupInfo, err := g.partitionBlockGroup(group, groupHash) + if err != nil { + return nil, err + } + updatedPartitionedGroupInfo, err := UpdatePartitionedGroupInfo(g.ctx, g.bkt, g.logger, *partitionedGroupInfo) + if err != nil { + return nil, err + } + return updatedPartitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) partitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionCount := g.calculatePartitionCount(group, groupHash) + blocksByMinTime := g.groupBlocksByMinTime(group) + partitionedGroups, err := g.partitionBlocksGroup(partitionCount, blocksByMinTime, group.rangeStart, group.rangeEnd) + if err != nil { + return nil, err + } + + var partitions []Partition + for partitionID := 0; partitionID < partitionCount; partitionID++ { + partitionedGroup := partitionedGroups[partitionID] + var blockIDs []ulid.ULID + for _, m := range partitionedGroup.blocks { + blockIDs = append(blockIDs, m.ULID) + } + partitions = append(partitions, Partition{ + PartitionID: partitionID, + Blocks: blockIDs, + }) + } + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: groupHash, + PartitionCount: partitionCount, + Partitions: partitions, + RangeStart: group.rangeStart, + RangeEnd: group.rangeEnd, + Version: PartitionedGroupInfoVersion1, + } + return &partitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) calculatePartitionCount(group blocksGroupWithPartition, groupHash uint32) int { + indexSizeLimit := g.limits.CompactorPartitionIndexSizeLimitInBytes(g.userID) + seriesCountLimit := g.limits.CompactorPartitionSeriesCountLimit(g.userID) + smallestRange := g.compactorCfg.BlockRanges.ToMilliseconds()[0] + groupRange := group.rangeLength() + if smallestRange >= groupRange { + level.Info(g.logger).Log("msg", "use level 1 block limits", "partitioned_group_id", groupHash, "smallestRange", smallestRange, "groupRange", groupRange) + indexSizeLimit = g.limits.CompactorPartitionLevel1IndexSizeLimitInBytes(g.userID) + seriesCountLimit = g.limits.CompactorPartitionLevel1SeriesCountLimit(g.userID) + } + + totalIndexSizeInBytes := int64(0) + totalSeriesCount := int64(0) + for _, block := range group.blocks { + blockFiles := block.Thanos.Files + totalSeriesCount += int64(block.Stats.NumSeries) + var indexFile *metadata.File + for _, file := range blockFiles { + if file.RelPath == thanosblock.IndexFilename { + indexFile = &file + } + } + if indexFile == nil { + level.Debug(g.logger).Log("msg", "unable to find index file in metadata", "block", block.ULID) + break + } + indexSize := indexFile.SizeBytes + totalIndexSizeInBytes += indexSize + } + partitionNumberBasedOnIndex := 1 + if indexSizeLimit > 0 && totalIndexSizeInBytes > indexSizeLimit { + partitionNumberBasedOnIndex = g.findNearestPartitionNumber(float64(totalIndexSizeInBytes), float64(indexSizeLimit)) + } + partitionNumberBasedOnSeries := 1 + if seriesCountLimit > 0 && totalSeriesCount > seriesCountLimit { + partitionNumberBasedOnSeries = g.findNearestPartitionNumber(float64(totalSeriesCount), float64(seriesCountLimit)) + } + partitionNumber := partitionNumberBasedOnIndex + if partitionNumberBasedOnSeries > partitionNumberBasedOnIndex { + partitionNumber = partitionNumberBasedOnSeries + } + level.Info(g.logger).Log("msg", "calculated partition number for group", "partitioned_group_id", groupHash, "partition_number", partitionNumber, "total_index_size", totalIndexSizeInBytes, "index_size_limit", indexSizeLimit, "total_series_count", totalSeriesCount, "series_count_limit", seriesCountLimit, "group", group.String()) + return partitionNumber +} + +func (g *PartitionCompactionGrouper) findNearestPartitionNumber(size float64, limit float64) int { + return int(math.Pow(2, math.Ceil(math.Log2(size/limit)))) +} + +func (g *PartitionCompactionGrouper) groupBlocksByMinTime(group blocksGroupWithPartition) map[int64][]*metadata.Meta { + blocksByMinTime := make(map[int64][]*metadata.Meta) + for _, block := range group.blocks { + blockRange := block.MaxTime - block.MinTime + minTime := block.MinTime + for _, tr := range g.compactorCfg.BlockRanges.ToMilliseconds() { + if blockRange <= tr { + minTime = tr * (block.MinTime / tr) + break + } + } + blocksByMinTime[minTime] = append(blocksByMinTime[minTime], block) + } + return blocksByMinTime +} + +func (g *PartitionCompactionGrouper) partitionBlocksGroup(partitionCount int, blocksByMinTime map[int64][]*metadata.Meta, rangeStart int64, rangeEnd int64) (map[int]blocksGroupWithPartition, error) { + partitionedGroups := make(map[int]blocksGroupWithPartition) + addToPartitionedGroups := func(blocks []*metadata.Meta, partitionID int) { + if _, ok := partitionedGroups[partitionID]; !ok { + partitionedGroups[partitionID] = blocksGroupWithPartition{ + rangeStart: rangeStart, + rangeEnd: rangeEnd, + blocks: []*metadata.Meta{}, + } + } + partitionedGroup := partitionedGroups[partitionID] + partitionedGroup.blocks = append(partitionedGroup.blocks, blocks...) + partitionedGroups[partitionID] = partitionedGroup + } + + for _, blocksInSameTimeInterval := range blocksByMinTime { + for _, block := range blocksInSameTimeInterval { + partitionInfo, err := tsdb.GetPartitionInfo(*block) + if err != nil { + return nil, err + } + if partitionInfo == nil || partitionInfo.PartitionCount < 1 { + // For legacy blocks with level > 1, treat PartitionID is always 0. + // So it can be included in every partition. + defaultPartitionInfo := tsdb.DefaultPartitionInfo + partitionInfo = &defaultPartitionInfo + } + if partitionInfo.PartitionCount < partitionCount { + for partitionID := partitionInfo.PartitionID; partitionID < partitionCount; partitionID += partitionInfo.PartitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionID) + } + } else if partitionInfo.PartitionCount == partitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID) + } else { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID%partitionCount) + } + } + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) sortPartitionedGroups(partitionedGroups []*PartitionedGroupInfo) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(partitionedGroups, func(i, j int) bool { + iGroup := partitionedGroups[i] + jGroup := partitionedGroups[j] + iRangeStart := iGroup.RangeStart + iRangeEnd := iGroup.RangeEnd + jRangeStart := jGroup.RangeStart + jRangeEnd := jGroup.RangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + // Guarantee stable sort for tests. + return iGroup.PartitionedGroupID < jGroup.PartitionedGroupID + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionCompactionJobs(blocks map[ulid.ULID]*metadata.Meta, partitionedGroups []*PartitionedGroupInfo, doRandomPick bool) []*blocksGroupWithPartition { + var partitionedBlockGroups []*blocksGroupWithPartition + for _, partitionedGroupInfo := range partitionedGroups { + partitionedGroupID := partitionedGroupInfo.PartitionedGroupID + partitionAdded := 0 + var partitionIDs []int + if doRandomPick { + // Randomly pick partitions from partitioned group to avoid all compactors + // trying to get same partition at same time. + r := rand.New(rand.NewSource(time.Now().UnixMicro() + int64(hashString(g.ringLifecyclerID)))) + partitionIDs = r.Perm(len(partitionedGroupInfo.Partitions)) + } else { + for i := 0; i < partitionedGroupInfo.PartitionCount; i++ { + partitionIDs = append(partitionIDs, i) + } + } + for _, i := range partitionIDs { + partition := partitionedGroupInfo.Partitions[i] + if len(partition.Blocks) == 1 { + partition.Blocks = append(partition.Blocks, DUMMY_BLOCK_ID) + level.Info(g.logger).Log("msg", "handled single block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + } else if len(partition.Blocks) < 1 { + if err := g.handleEmptyPartition(partitionedGroupInfo, partition); err != nil { + level.Warn(g.logger).Log("msg", "failed to handle empty partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID, "err", err) + } + continue + } + partitionedGroup, err := createBlocksGroup(blocks, partition.Blocks, partitionedGroupInfo.RangeStart, partitionedGroupInfo.RangeEnd) + if err != nil { + continue + } + partitionedGroup.groupHash = partitionedGroupID + partitionedGroup.partitionedGroupInfo = partitionedGroupInfo + partitionedGroup.partition = partition + partitionedBlockGroups = append(partitionedBlockGroups, partitionedGroup) + partitionAdded++ + } + } + return partitionedBlockGroups +} + +func (g *PartitionCompactionGrouper) handleEmptyPartition(partitionedGroupInfo *PartitionedGroupInfo, partition Partition) error { + if len(partition.Blocks) > 0 { + return nil + } + + level.Info(g.logger).Log("msg", "handling empty block partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + partitionVisitMarker := &PartitionVisitMarker{ + PartitionedGroupID: partitionedGroupInfo.PartitionedGroupID, + PartitionID: partition.PartitionID, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, partitionVisitMarker) + visitMarkerManager.MarkWithStatus(g.ctx, Completed) + + level.Info(g.logger).Log("msg", "handled empty block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + return nil +} + +func (g *PartitionCompactionGrouper) pickPartitionCompactionJob(partitionCompactionJobs []*blocksGroupWithPartition) []*compact.Group { + var outGroups []*compact.Group + for _, partitionedGroup := range partitionCompactionJobs { + groupHash := partitionedGroup.groupHash + partitionedGroupID := partitionedGroup.partitionedGroupInfo.PartitionedGroupID + partitionCount := partitionedGroup.partitionedGroupInfo.PartitionCount + partitionID := partitionedGroup.partition.PartitionID + partitionedGroupLogger := log.With(g.logger, "rangeStart", partitionedGroup.rangeStartTime().String(), "rangeEnd", partitionedGroup.rangeEndTime().String(), "rangeDuration", partitionedGroup.rangeDuration().String(), "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "partition_count", partitionCount, "group_hash", groupHash) + partitionVisitMarker := NewPartitionVisitMarker(g.ringLifecyclerID, partitionedGroupID, partitionID) + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, partitionVisitMarker) + if isVisited, err := g.isGroupVisited(partitionID, visitMarkerManager); err != nil { + level.Warn(partitionedGroupLogger).Log("msg", "unable to check if partition is visited", "err", err, "group", partitionedGroup.String()) + continue + } else if isVisited { + level.Info(partitionedGroupLogger).Log("msg", "skipping group because partition is visited") + continue + } + partitionedGroupKey := createGroupKeyWithPartitionID(groupHash, partitionID, *partitionedGroup) + + level.Info(partitionedGroupLogger).Log("msg", "found compactable group for user", "group", partitionedGroup.String()) + begin := time.Now() + + visitMarkerManager.MarkWithStatus(g.ctx, Pending) + level.Info(partitionedGroupLogger).Log("msg", "marked partition visited in group", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds(), "group", partitionedGroup.String()) + + resolution := partitionedGroup.blocks[0].Thanos.Downsample.Resolution + externalLabels := labels.FromMap(partitionedGroup.blocks[0].Thanos.Labels) + timeRange := partitionedGroup.rangeEnd - partitionedGroup.rangeStart + metricLabelValues := []string{ + g.userID, + fmt.Sprintf("%d", timeRange), + } + g.compactorMetrics.initMetricWithCompactionLabelValues(metricLabelValues...) + g.compactorMetrics.partitionCount.WithLabelValues(metricLabelValues...).Set(float64(partitionCount)) + thanosGroup, err := compact.NewGroup( + log.With(partitionedGroupLogger, "groupKey", partitionedGroupKey, "externalLabels", externalLabels, "downsampleResolution", resolution), + g.bkt, + partitionedGroupKey, + externalLabels, + resolution, + g.acceptMalformedIndex, + true, // Enable vertical compaction. + g.compactorMetrics.compactions.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsStarted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsCompleted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionFailures.WithLabelValues(metricLabelValues...), + g.compactorMetrics.verticalCompactions.WithLabelValues(metricLabelValues...), + g.syncerMetrics.GarbageCollectedBlocks, + g.syncerMetrics.BlocksMarkedForDeletion, + g.blocksMarkedForNoCompact, + g.hashFunc, + g.blockFilesConcurrency, + g.blocksFetchConcurrency, + ) + if err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to create partitioned group", "blocks", partitionedGroup.partition.Blocks) + } + + for _, m := range partitionedGroup.blocks { + if err := thanosGroup.AppendMeta(m); err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to add block to partitioned group", "block", m.ULID, "err", err) + } + } + thanosGroup.SetExtensions(&tsdb.CortexMetaExtensions{ + PartitionInfo: &tsdb.PartitionInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: partitionCount, + PartitionID: partitionID, + PartitionedGroupCreationTime: partitionedGroup.partitionedGroupInfo.CreationTime, + }, + TimeRange: timeRange, + }) + + outGroups = append(outGroups, thanosGroup) + level.Debug(partitionedGroupLogger).Log("msg", "added partition to compaction groups") + if len(outGroups) >= g.compactionConcurrency { + break + } + } + + level.Info(g.logger).Log("msg", fmt.Sprintf("total groups for compaction: %d", len(outGroups))) + + for _, p := range outGroups { + partitionInfo, err := tsdb.ConvertToPartitionInfo(p.Extensions()) + if err == nil && partitionInfo != nil { + level.Info(g.logger).Log("msg", "picked compaction job", "partitioned_group_id", partitionInfo.PartitionedGroupID, "partition_count", partitionInfo.PartitionCount) + } + } + return outGroups +} + +func (g *PartitionCompactionGrouper) isGroupVisited(partitionID int, visitMarkerManager *VisitMarkerManager) (bool, error) { + partitionVisitMarker := &PartitionVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(g.ctx, partitionVisitMarker) + if err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + level.Warn(g.logger).Log("msg", "no visit marker file for partition", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath()) + return false, nil + } + level.Error(g.logger).Log("msg", "unable to read partition visit marker file", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath(), "err", err) + return true, err + } + if partitionVisitMarker.GetStatus() == Completed { + level.Info(g.logger).Log("msg", "partition visit marker with partition ID is completed", "partition_visit_marker", partitionVisitMarker.String()) + return true, nil + } + if partitionVisitMarker.IsVisited(g.partitionVisitMarkerTimeout, partitionID) { + level.Info(g.logger).Log("msg", "visited partition with partition ID", "partition_visit_marker", partitionVisitMarker.String()) + return true, nil + } + return false, nil +} + +type TimeRangeChecker struct { + // This is a map of timeRange to a map of rangeStart to timeRangeStatus + TimeRangesStatus map[int64]map[int64]*timeRangeStatus +} + +func NewCompletenessChecker(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, timeRanges []int64) TimeRangeChecker { + timeRangeToBlockMap := make(map[int64][]*metadata.Meta) + for _, b := range blocks { + timeRange := int64(0) + if b.Compaction.Level > 1 { + ext, err := tsdb.GetCortexMetaExtensionsFromMeta(*b) + if err == nil && ext != nil && ext.TimeRange > 0 { + timeRange = ext.TimeRange + } else { + // fallback logic to guess block time range based + // on MaxTime and MinTime + blockRange := b.MaxTime - b.MinTime + for _, tr := range timeRanges { + rangeStart := getRangeStart(b, tr) + rangeEnd := rangeStart + tr + if tr >= blockRange && rangeEnd >= b.MaxTime { + timeRange = tr + break + } + } + } + } + timeRangeToBlockMap[timeRange] = append(timeRangeToBlockMap[timeRange], b) + } + timeRangesStatus := make(map[int64]map[int64]*timeRangeStatus) + for _, g := range groups { + tr := g.rangeEnd - g.rangeStart + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + timeRangesStatus[tr][g.rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: g.rangeStart, + rangeEnd: g.rangeEnd, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + for tr, blks := range timeRangeToBlockMap { + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + for _, b := range blks { + actualTr := tr + if tr == 0 { + actualTr = timeRanges[0] + } + rangeStart := getRangeStart(b, actualTr) + if _, ok := timeRangesStatus[tr][rangeStart]; !ok { + timeRangesStatus[tr][rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: rangeStart, + rangeEnd: rangeStart + actualTr, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + timeRangesStatus[tr][rangeStart].addBlock(1) + } + } + previousTimeRanges := []int64{0} + for _, tr := range timeRanges { + timeRangeLoop: + for rangeStart, status := range timeRangesStatus[tr] { + previousTrBlocks := 0 + for _, previousTr := range previousTimeRanges { + allPreviousTimeRanges := getAllPreviousTimeRanges(tr, rangeStart, previousTr, timeRanges[0]) + for _, previousRangeStart := range allPreviousTimeRanges { + if previousTrStatus, ok := timeRangesStatus[previousTr][previousRangeStart]; ok { + if previousTrStatus.canTakeCompaction { + status.canTakeCompaction = false + continue timeRangeLoop + } + previousTrBlocks += previousTrStatus.numActiveBlocks + } + } + } + status.canTakeCompaction = !(previousTrBlocks == 0 || (previousTrBlocks == 1 && status.numActiveBlocks == 0)) + } + previousTimeRanges = append(previousTimeRanges, tr) + } + return TimeRangeChecker{TimeRangesStatus: timeRangesStatus} +} + +// getAllPreviousTimeRanges returns a list of rangeStart time for previous time range that +// falls within current time range and start time +func getAllPreviousTimeRanges(currentTr int64, rangeStart int64, previousTr int64, smallestTr int64) []int64 { + var result []int64 + if previousTr == 0 { + previousTr = smallestTr + } + previousRangeStart := rangeStart + for ; previousRangeStart+previousTr <= rangeStart+currentTr; previousRangeStart += previousTr { + result = append(result, previousRangeStart) + } + return result +} + +type timeRangeStatus struct { + timeRange int64 + rangeStart int64 + rangeEnd int64 + numActiveBlocks int + canTakeCompaction bool + previousTimeRange int64 +} + +func (t *timeRangeStatus) addBlock(num int) { + t.numActiveBlocks += num +} + +func (t *timeRangeStatus) rangeStartTime() time.Time { + return time.Unix(0, t.rangeStart*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) rangeEndTime() time.Time { + return time.Unix(0, t.rangeEnd*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) timeRangeDuration() time.Duration { + return time.Duration(t.timeRange) * time.Millisecond +} + +func (t *timeRangeStatus) previousTimeRangeDuration() time.Duration { + return time.Duration(t.previousTimeRange) * time.Millisecond +} + +type blocksGroupWithPartition struct { + blocksGroup + rangeStart int64 // Included. + rangeEnd int64 // Excluded. + blocks []*metadata.Meta + groupHash uint32 + partitionedGroupInfo *PartitionedGroupInfo + partition Partition +} + +func (g blocksGroupWithPartition) rangeDuration() time.Duration { + return g.rangeEndTime().Sub(g.rangeStartTime()) +} + +func createGroupKeyWithPartition(groupHash uint32, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%s", groupHash, group.blocks[0].Thanos.GroupKey()) +} + +func createGroupKeyWithPartitionID(groupHash uint32, partitionID int, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%d%s", groupHash, partitionID, group.blocks[0].Thanos.GroupKey()) +} + +func createBlocksGroup(blocks map[ulid.ULID]*metadata.Meta, blockIDs []ulid.ULID, rangeStart int64, rangeEnd int64) (*blocksGroupWithPartition, error) { + var group blocksGroupWithPartition + group.rangeStart = rangeStart + group.rangeEnd = rangeEnd + var nonDummyBlock *metadata.Meta + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + continue + } + m, ok := blocks[blockID] + if !ok { + return nil, fmt.Errorf("block not found: %s", blockID) + } + nonDummyBlock = m + group.blocks = append(group.blocks, m) + } + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + dummyMeta := *nonDummyBlock + dummyMeta.ULID = DUMMY_BLOCK_ID + group.blocks = append(group.blocks, &dummyMeta) + } + } + return &group, nil } diff --git a/pkg/compactor/partition_compaction_grouper_test.go b/pkg/compactor/partition_compaction_grouper_test.go new file mode 100644 index 0000000000..e910bd8126 --- /dev/null +++ b/pkg/compactor/partition_compaction_grouper_test.go @@ -0,0 +1,2139 @@ +package compactor + +import ( + "context" + "encoding/json" + "fmt" + "path" + "testing" + "time" + + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util/validation" +) + +var ( + M = time.Minute.Milliseconds() + H = time.Hour.Milliseconds() +) + +func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { + block1 := ulid.MustNew(1, nil) + block2 := ulid.MustNew(2, nil) + block3 := ulid.MustNew(3, nil) + block4 := ulid.MustNew(4, nil) + block5 := ulid.MustNew(5, nil) + block6 := ulid.MustNew(6, nil) + block7 := ulid.MustNew(7, nil) + + testCompactorID := "test-compactor" + //otherCompactorID := "other-compactor" + + userID := "test-user" + partitionedGroupID_0_2 := hashGroup(userID, 0*H, 2*H) + partitionedGroupID_0_12 := hashGroup(userID, 0*H, 12*H) + partitionedGroupID_0_24 := hashGroup(userID, 0*H, 24*H) + + tests := map[string]generateCompactionJobsTestCase{ + "only level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there are existing partitioned group files for all blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + {rangeStart: 2 * H, rangeEnd: 4 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks from same time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks, there is partitioned group file for level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 10 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3}}, + }}, + {rangeStart: 12 * H, rangeEnd: 14 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks along with level 3 blocks from some of partitions, level 1 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 4, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 1, Blocks: []ulid.ULID{block2, block3}}, + {PartitionID: 2, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 3, Blocks: []ulid.ULID{block2, block3}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 2: {partitionID: 2, compactorID: testCompactorID, isExpired: false, status: Completed}, + }}, + {rangeStart: 22 * H, rangeEnd: 24 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block6, block7}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 22 * H, rangeEnd: 24 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 2, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 3, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks in first 12h are all complete, level 2 blocks in second 12h have not started compaction, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 6 * H, MaxTime: 8 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks are all complete, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 14 * H, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 24 * H}, + }, + }, + "level 2 blocks are complete only in second half of 12h, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + {PartitionID: 1, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 3 blocks are complete, there are some level 2 blocks not deleted, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + // nothing should be grouped. cleaner should mark all level 2 blocks for deletion + // and delete partitioned group file since level 2 to level 3 compaction is complete + }, + }, + "recompact one level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks in same and different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks all in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks and level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with one level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 1 block in same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block in different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with one level 4 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 1 blocks in different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 2 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 2 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 3 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "blocks with partition info should be assigned to correct partition": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 3}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block3, block5, block7}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block4, block6, block7}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "one of the partitions got only one block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, DUMMY_BLOCK_ID}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "not all level 2 blocks are in bucket index": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "not all level 2 blocks are in bucket index and there are late level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 3 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks not all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_2, PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with data only in part of time range across smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10*H + 49*M, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{TimeRange: 24 * H, PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with no time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + compactorCfg := &Config{ + BlockRanges: testCase.ranges, + } + + limits := &validation.Limits{ + CompactorPartitionSeriesCountLimit: 4, + CompactorPartitionLevel1SeriesCountLimit: 4, + } + overrides, err := validation.NewOverrides(*limits, nil) + require.NoError(t, err) + + // Setup mocking of the ring so that the grouper will own all the shards + rs := ring.ReplicationSet{ + Instances: []ring.InstanceDesc{ + {Addr: "test-addr"}, + }, + } + subring := &RingMock{} + subring.On("GetAllHealthy", mock.Anything).Return(rs, nil) + subring.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(rs, nil) + + ring := &RingMock{} + ring.On("ShuffleShard", mock.Anything, mock.Anything).Return(subring, nil) + + registerer := prometheus.NewPedanticRegistry() + + metrics := newCompactorMetrics(registerer) + + noCompactFilter := testCase.getNoCompactFilter() + + bkt := &bucket.ClientMock{} + visitMarkerTimeout := 5 * time.Minute + testCase.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + bkt.MockUpload(mock.Anything, nil) + bkt.MockGet(mock.Anything, "", nil) + bkt.MockIter(mock.Anything, nil, nil) + + for _, b := range testCase.blocks { + b.fixPartitionInfo(t, userID) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + g := NewPartitionCompactionGrouper( + ctx, + nil, + objstore.WithNoopInstr(bkt), + false, // Do not accept malformed indexes + true, // Enable vertical compaction + nil, + metrics.getSyncerMetrics(userID), + metrics, + metadata.NoneFunc, + *compactorCfg, + ring, + "test-addr", + testCompactorID, + overrides, + userID, + 10, + 3, + 1, + false, + visitMarkerTimeout, + noCompactFilter, + ) + actual, err := g.generateCompactionJobs(testCase.getBlocks()) + require.NoError(t, err) + require.Len(t, actual, len(testCase.expected)) + + for idx, expectedGroup := range testCase.expected { + actualGroup := actual[idx] + actualBlocks := actualGroup.blocks + require.Equal(t, expectedGroup.rangeStart, actualGroup.partitionedGroupInfo.RangeStart) + require.Equal(t, expectedGroup.rangeEnd, actualGroup.partitionedGroupInfo.RangeEnd) + require.Equal(t, expectedGroup.partitionCount, actualGroup.partitionedGroupInfo.PartitionCount) + require.Equal(t, expectedGroup.partitionID, actualGroup.partition.PartitionID) + require.Len(t, actualBlocks, len(expectedGroup.blocks)) + for _, b := range actualBlocks { + require.Contains(t, expectedGroup.blocks, b.ULID) + } + } + }) + } +} + +type generateCompactionJobsTestCase struct { + ranges []time.Duration + blocks map[ulid.ULID]mockBlock + existingPartitionedGroups []mockExistingPartitionedGroup + expected []expectedCompactionJob +} + +func (g *generateCompactionJobsTestCase) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) { + var existingPartitionedGroupFiles []string + for _, existingPartitionedGroup := range g.existingPartitionedGroups { + partitionedGroupFilePath := existingPartitionedGroup.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + existingPartitionedGroupFiles = append(existingPartitionedGroupFiles, partitionedGroupFilePath) + } + bkt.MockIter(PartitionedGroupDirectory, existingPartitionedGroupFiles, nil) +} + +func (g *generateCompactionJobsTestCase) getNoCompactFilter() func() map[ulid.ULID]*metadata.NoCompactMark { + noCompactBlocks := make(map[ulid.ULID]*metadata.NoCompactMark) + for id, b := range g.blocks { + if b.hasNoCompactMark { + noCompactBlocks[id] = &metadata.NoCompactMark{ + ID: id, + NoCompactTime: time.Now().Add(-1 * time.Hour).Unix(), + } + } + } + return func() map[ulid.ULID]*metadata.NoCompactMark { + return noCompactBlocks + } +} + +func (g *generateCompactionJobsTestCase) getBlocks() map[ulid.ULID]*metadata.Meta { + blocks := make(map[ulid.ULID]*metadata.Meta) + for id, b := range g.blocks { + blocks[id] = b.meta + } + return blocks +} + +type mockExistingPartitionedGroup struct { + partitionedGroupID uint32 + rangeStart int64 + rangeEnd int64 + partitionCount int + partitions []Partition + partitionVisitMarkers map[int]mockPartitionVisitMarker +} + +func (p *mockExistingPartitionedGroup) updatePartitionedGroupID(userID string) { + p.partitionedGroupID = hashGroup(userID, p.rangeStart, p.rangeEnd) +} + +func (p *mockExistingPartitionedGroup) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) string { + p.updatePartitionedGroupID(userID) + partitionedGroupFilePath := path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", p.partitionedGroupID)) + for _, partition := range p.partitions { + partitionID := partition.PartitionID + if _, ok := p.partitionVisitMarkers[partitionID]; !ok { + continue + } + visitMarker := p.partitionVisitMarkers[partitionID] + partitionVisitMarkerFilePath := path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, + fmt.Sprintf("%d/%s%d-%s", p.partitionedGroupID, PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) + visitTime := time.Now() + if visitMarker.isExpired { + visitTime = time.Now().Add(-2 * visitMarkerTimeout) + } + partitionVisitMarker := PartitionVisitMarker{ + CompactorID: visitMarker.compactorID, + Status: visitMarker.status, + PartitionedGroupID: p.partitionedGroupID, + PartitionID: partitionID, + VisitTime: visitTime.UnixMilli(), + Version: PartitionVisitMarkerVersion1, + } + partitionVisitMarkerContent, err := json.Marshal(partitionVisitMarker) + require.NoError(t, err) + bkt.MockGet(partitionVisitMarkerFilePath, string(partitionVisitMarkerContent), nil) + } + partitionedGroup := PartitionedGroupInfo{ + PartitionedGroupID: p.partitionedGroupID, + PartitionCount: p.partitionCount, + Partitions: p.partitions, + RangeStart: p.rangeStart, + RangeEnd: p.rangeEnd, + CreationTime: time.Now().Add(-1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + } + partitionedGroupContent, err := json.Marshal(partitionedGroup) + require.NoError(t, err) + bkt.MockGet(partitionedGroupFilePath, string(partitionedGroupContent), nil) + return partitionedGroupFilePath +} + +type mockBlock struct { + meta *metadata.Meta + timeRange time.Duration + hasNoCompactMark bool +} + +func (b *mockBlock) fixPartitionInfo(t *testing.T, userID string) { + extensions, err := cortextsdb.GetCortexMetaExtensionsFromMeta(*b.meta) + require.NoError(t, err) + if extensions != nil { + rangeStart := getRangeStart(b.meta, b.timeRange.Milliseconds()) + rangeEnd := rangeStart + b.timeRange.Milliseconds() + if extensions.PartitionInfo.PartitionedGroupID == 0 { + extensions.PartitionInfo.PartitionedGroupID = hashGroup(userID, rangeStart, rangeEnd) + } + b.meta.Thanos.Extensions = extensions + } +} + +type mockPartitionVisitMarker struct { + partitionID int + compactorID string + isExpired bool + status VisitStatus +} + +type expectedCompactionJob struct { + blocks []ulid.ULID + partitionCount int + partitionID int + rangeStart int64 + rangeEnd int64 +} diff --git a/pkg/compactor/partition_visit_marker.go b/pkg/compactor/partition_visit_marker.go new file mode 100644 index 0000000000..e12f1107c8 --- /dev/null +++ b/pkg/compactor/partition_visit_marker.go @@ -0,0 +1,96 @@ +package compactor + +import ( + "fmt" + "path" + "strings" + "time" + + "github.com/pkg/errors" +) + +const ( + // PartitionVisitMarkerDirectory is the name of directory where all visit markers are saved. + PartitionVisitMarkerDirectory = "visit-marks" + // PartitionVisitMarkerFileSuffix is the known suffix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFileSuffix = "visit-mark.json" + // PartitionVisitMarkerFilePrefix is the known prefix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFilePrefix = "partition-" + // PartitionVisitMarkerVersion1 is the current supported version of visit-mark file. + PartitionVisitMarkerVersion1 = 1 +) + +var ( + errorNotPartitionVisitMarker = errors.New("file is not partition visit marker") +) + +type PartitionVisitMarker struct { + CompactorID string `json:"compactorID"` + Status VisitStatus `json:"status"` + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionID int `json:"partitionID"` + // VisitTime is a unix timestamp of when the partition was visited (mark updated). + VisitTime int64 `json:"visitTime"` + // Version of the file. + Version int `json:"version"` +} + +func NewPartitionVisitMarker(compactorID string, partitionedGroupID uint32, partitionID int) *PartitionVisitMarker { + return &PartitionVisitMarker{ + CompactorID: compactorID, + PartitionedGroupID: partitionedGroupID, + PartitionID: partitionID, + } +} + +func (b *PartitionVisitMarker) IsExpired(partitionVisitMarkerTimeout time.Duration) bool { + return !time.Now().Before(time.Unix(b.VisitTime, 0).Add(partitionVisitMarkerTimeout)) +} + +func (b *PartitionVisitMarker) IsVisited(partitionVisitMarkerTimeout time.Duration, partitionID int) bool { + return b.GetStatus() == Completed || (partitionID == b.PartitionID && !b.IsExpired(partitionVisitMarkerTimeout)) +} + +func (b *PartitionVisitMarker) IsPendingByCompactor(partitionVisitMarkerTimeout time.Duration, partitionID int, compactorID string) bool { + return b.CompactorID == compactorID && partitionID == b.PartitionID && b.GetStatus() == Pending && !b.IsExpired(partitionVisitMarkerTimeout) +} + +func (b *PartitionVisitMarker) GetStatus() VisitStatus { + return b.Status +} + +func (b *PartitionVisitMarker) GetVisitMarkerFilePath() string { + return GetPartitionVisitMarkerFilePath(b.PartitionedGroupID, b.PartitionID) +} + +func (b *PartitionVisitMarker) UpdateStatus(ownerIdentifier string, status VisitStatus) { + b.CompactorID = ownerIdentifier + b.Status = status + b.VisitTime = time.Now().Unix() +} + +func (b *PartitionVisitMarker) String() string { + return fmt.Sprintf("visit_marker_partitioned_group_id=%d visit_marker_partition_id=%d visit_marker_compactor_id=%s visit_marker_status=%s visit_marker_visit_time=%s", + b.PartitionedGroupID, + b.PartitionID, + b.CompactorID, + b.Status, + time.Unix(b.VisitTime, 0).String(), + ) +} + +func GetPartitionVisitMarkerFilePath(partitionedGroupID uint32, partitionID int) string { + return path.Join(GetPartitionVisitMarkerDirectoryPath(partitionedGroupID), fmt.Sprintf("%s%d-%s", PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) +} + +func GetPartitionVisitMarkerDirectoryPath(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, fmt.Sprintf("%d", partitionedGroupID)) +} + +func IsPartitionVisitMarker(path string) bool { + return strings.HasSuffix(path, PartitionVisitMarkerFileSuffix) +} + +func IsNotPartitionVisitMarkerError(err error) bool { + return errors.Is(err, errorNotPartitionVisitMarker) +} diff --git a/pkg/compactor/partitioned_group_info.go b/pkg/compactor/partitioned_group_info.go new file mode 100644 index 0000000000..b06e854d9c --- /dev/null +++ b/pkg/compactor/partitioned_group_info.go @@ -0,0 +1,307 @@ +package compactor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "path" + "strings" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/util/runutil" +) + +const ( + PartitionedGroupDirectory = "partitioned-groups" + PartitionedGroupInfoVersion1 = 1 +) + +var ( + ErrorPartitionedGroupInfoNotFound = errors.New("partitioned group info not found") + ErrorUnmarshalPartitionedGroupInfo = errors.New("unmarshal partitioned group info JSON") +) + +type Partition struct { + PartitionID int `json:"partitionID"` + Blocks []ulid.ULID `json:"blocks"` +} + +type PartitionedGroupStatus struct { + PartitionedGroupID uint32 + CanDelete bool + IsCompleted bool + DeleteVisitMarker bool + PendingPartitions int + InProgressPartitions int + PendingOrFailedPartitions []Partition +} + +func (s PartitionedGroupStatus) String() string { + var partitions []string + for _, p := range s.PendingOrFailedPartitions { + partitions = append(partitions, fmt.Sprintf("%d", p.PartitionID)) + } + return fmt.Sprintf(`{"partitioned_group_id": %d, "can_delete": %t, "is_complete": %t, "delete_visit_marker": %t, "pending_partitions": %d, "in_progress_partitions": %d, "pending_or_failed_partitions": [%s]}`, + s.PartitionedGroupID, s.CanDelete, s.IsCompleted, s.DeleteVisitMarker, s.PendingPartitions, s.InProgressPartitions, strings.Join(partitions, ",")) +} + +type PartitionedGroupInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionCount int `json:"partitionCount"` + Partitions []Partition `json:"partitions"` + RangeStart int64 `json:"rangeStart"` + RangeEnd int64 `json:"rangeEnd"` + CreationTime int64 `json:"creationTime"` + // Version of the file. + Version int `json:"version"` +} + +func (p *PartitionedGroupInfo) rangeStartTime() time.Time { + return time.Unix(0, p.RangeStart*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) rangeEndTime() time.Time { + return time.Unix(0, p.RangeEnd*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) getPartitionIDsByBlock(blockID ulid.ULID) []int { + var partitionIDs []int +partitionLoop: + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + if block == blockID { + partitionIDs = append(partitionIDs, partition.PartitionID) + continue partitionLoop + } + } + } + return partitionIDs +} + +func (p *PartitionedGroupInfo) getAllBlocks() []ulid.ULID { + uniqueBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + uniqueBlocks[block] = struct{}{} + } + } + blocks := make([]ulid.ULID, len(uniqueBlocks)) + i := 0 + for block := range uniqueBlocks { + blocks[i] = block + i++ + } + return blocks +} + +func (p *PartitionedGroupInfo) getPartitionedGroupStatus( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + partitionVisitMarkerTimeout time.Duration, + userLogger log.Logger, +) PartitionedGroupStatus { + status := PartitionedGroupStatus{ + PartitionedGroupID: p.PartitionedGroupID, + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingPartitions: 0, + InProgressPartitions: 0, + PendingOrFailedPartitions: []Partition{}, + } + allPartitionCompleted := true + hasInProgressPartitions := false + for _, partition := range p.Partitions { + partitionVisitMarker := &PartitionVisitMarker{ + PartitionedGroupID: p.PartitionedGroupID, + PartitionID: partition.PartitionID, + } + visitMarkerManager := NewVisitMarkerManager(userBucket, userLogger, "PartitionedGroupInfo.getPartitionedGroupStatus", partitionVisitMarker) + partitionVisitMarkerExists := true + if err := visitMarkerManager.ReadVisitMarker(ctx, partitionVisitMarker); err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + partitionVisitMarkerExists = false + } else { + level.Warn(userLogger).Log("msg", "unable to read partition visit marker", "path", partitionVisitMarker.GetVisitMarkerFilePath(), "err", err) + return status + } + } + + if !partitionVisitMarkerExists { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + continue + } + + if partitionVisitMarker.VisitTime < p.CreationTime { + status.DeleteVisitMarker = true + allPartitionCompleted = false + continue + } + + if partitionVisitMarker.GetStatus() == Completed { + continue + } + + if (partitionVisitMarker.GetStatus() == Pending || partitionVisitMarker.GetStatus() == InProgress) && !partitionVisitMarker.IsExpired(partitionVisitMarkerTimeout) { + status.InProgressPartitions++ + hasInProgressPartitions = true + allPartitionCompleted = false + continue + } + + if partitionVisitMarker.GetStatus() == Failed { + status.PendingPartitions++ + } else { + status.PendingPartitions++ + } + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } + + if hasInProgressPartitions { + return status + } + + status.IsCompleted = allPartitionCompleted + + if allPartitionCompleted { + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + + checkedBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range status.PendingOrFailedPartitions { + for _, blockID := range partition.Blocks { + if _, ok := checkedBlocks[blockID]; ok { + continue + } + if !p.doesBlockExist(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is physically deleted", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockDeleted(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for deletion", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockNoCompact(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for no compact", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + checkedBlocks[blockID] = struct{}{} + } + } + return status +} + +func (p *PartitionedGroupInfo) doesBlockExist(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + metaExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.MetaFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of meta.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return true + } + return metaExists +} + +func (p *PartitionedGroupInfo) isBlockDeleted(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + deletionMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.DeletionMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of deletion-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return deletionMarkerExists +} + +func (p *PartitionedGroupInfo) isBlockNoCompact(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + noCompactMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.NoCompactMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of no-compact-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return noCompactMarkerExists +} + +func (p *PartitionedGroupInfo) String() string { + var partitions []string + for _, partition := range p.Partitions { + partitions = append(partitions, fmt.Sprintf("(PartitionID: %d, Blocks: %s)", partition.PartitionID, partition.Blocks)) + } + return fmt.Sprintf("{PartitionedGroupID: %d, PartitionCount: %d, Partitions: %s}", p.PartitionedGroupID, p.PartitionCount, strings.Join(partitions, ", ")) +} + +func GetPartitionedGroupFile(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", partitionedGroupID)) +} + +func ReadPartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupID uint32) (*PartitionedGroupInfo, error) { + return ReadPartitionedGroupInfoFile(ctx, bkt, logger, GetPartitionedGroupFile(partitionedGroupID)) +} + +func ReadPartitionedGroupInfoFile(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupFile string) (*PartitionedGroupInfo, error) { + partitionedGroupReader, err := bkt.ReaderWithExpectedErrs(bkt.IsObjNotFoundErr).Get(ctx, partitionedGroupFile) + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return nil, errors.Wrapf(ErrorPartitionedGroupInfoNotFound, "partitioned group file: %s", partitionedGroupReader) + } + return nil, errors.Wrapf(err, "get partitioned group file: %s", partitionedGroupReader) + } + defer runutil.CloseWithLogOnErr(logger, partitionedGroupReader, "close partitioned group reader") + p, err := io.ReadAll(partitionedGroupReader) + if err != nil { + return nil, errors.Wrapf(err, "read partitioned group file: %s", partitionedGroupFile) + } + partitionedGroupInfo := PartitionedGroupInfo{} + if err = json.Unmarshal(p, &partitionedGroupInfo); err != nil { + return nil, errors.Wrapf(ErrorUnmarshalPartitionedGroupInfo, "partitioned group file: %s, error: %v", partitionedGroupFile, err.Error()) + } + if partitionedGroupInfo.Version != VisitMarkerVersion1 { + return nil, errors.Errorf("unexpected partitioned group file version %d, expected %d", partitionedGroupInfo.Version, VisitMarkerVersion1) + } + if partitionedGroupInfo.CreationTime <= 0 { + objAttr, err := bkt.Attributes(ctx, partitionedGroupFile) + if err != nil { + return nil, errors.Errorf("unable to get partitioned group file attributes: %s, error: %v", partitionedGroupFile, err.Error()) + } + partitionedGroupInfo.CreationTime = objAttr.LastModified.Unix() + } + return &partitionedGroupInfo, nil +} + +func UpdatePartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, partitionedGroupInfo PartitionedGroupInfo) (*PartitionedGroupInfo, error) { + existingPartitionedGroup, _ := ReadPartitionedGroupInfo(ctx, bkt, logger, partitionedGroupInfo.PartitionedGroupID) + if existingPartitionedGroup != nil { + level.Warn(logger).Log("msg", "partitioned group info already exists", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return existingPartitionedGroup, nil + } + if partitionedGroupInfo.CreationTime <= 0 { + partitionedGroupInfo.CreationTime = time.Now().Unix() + } + partitionedGroupFile := GetPartitionedGroupFile(partitionedGroupInfo.PartitionedGroupID) + partitionedGroupInfoContent, err := json.Marshal(partitionedGroupInfo) + if err != nil { + return nil, err + } + reader := bytes.NewReader(partitionedGroupInfoContent) + if err := bkt.Upload(ctx, partitionedGroupFile, reader); err != nil { + return nil, err + } + level.Info(logger).Log("msg", "created new partitioned group info", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return &partitionedGroupInfo, nil +} diff --git a/pkg/compactor/partitioned_group_info_test.go b/pkg/compactor/partitioned_group_info_test.go new file mode 100644 index 0000000000..6769864660 --- /dev/null +++ b/pkg/compactor/partitioned_group_info_test.go @@ -0,0 +1,882 @@ +package compactor + +import ( + "context" + "encoding/json" + "path" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" +) + +func TestPartitionedGroupInfo(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + rangeStart := (1 * time.Hour).Milliseconds() + rangeEnd := (2 * time.Hour).Milliseconds() + partitionedGroupID := uint32(12345) + for _, tcase := range []struct { + name string + partitionedGroupInfo PartitionedGroupInfo + }{ + { + name: "write partitioned group info 1", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + { + name: "write partitioned group info 2", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid1, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + ctx := context.Background() + testBkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + bkt := objstore.WithNoopInstr(testBkt) + logger := log.NewNopLogger() + writeRes, err := UpdatePartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo) + tcase.partitionedGroupInfo.CreationTime = writeRes.CreationTime + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *writeRes) + readRes, err := ReadPartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo.PartitionedGroupID) + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *readRes) + }) + } +} + +func TestGetPartitionIDsByBlock(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + ulid3 := ulid.MustNew(3, nil) + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: uint32(12345), + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + ulid2, + ulid3, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + Version: PartitionedGroupInfoVersion1, + } + + res0 := partitionedGroupInfo.getPartitionIDsByBlock(ulid0) + require.Equal(t, 3, len(res0)) + require.Contains(t, res0, 0) + require.Contains(t, res0, 1) + require.Contains(t, res0, 2) + + res1 := partitionedGroupInfo.getPartitionIDsByBlock(ulid1) + require.Equal(t, 2, len(res1)) + require.Contains(t, res1, 0) + require.Contains(t, res1, 2) + + res2 := partitionedGroupInfo.getPartitionIDsByBlock(ulid2) + require.Equal(t, 2, len(res2)) + require.Contains(t, res2, 1) + require.Contains(t, res2, 2) + + res3 := partitionedGroupInfo.getPartitionIDsByBlock(ulid3) + require.Equal(t, 1, len(res3)) + require.Contains(t, res3, 2) +} + +func TestGetPartitionedGroupStatus(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + partitionedGroupID := uint32(1234) + for _, tcase := range []struct { + name string + expectedResult PartitionedGroupStatus + partitionedGroupInfo PartitionedGroupInfo + partitionVisitMarkers []PartitionVisitMarker + deletedBlock map[ulid.ULID]bool + noCompactBlock map[ulid.ULID]struct{} + }{ + { + name: "test one partition is not visited and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is pending and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is completed and one partition is under visiting", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: false, + }, + }, + { + name: "test one partition is pending expired", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete with one block deleted and one partition is not visited with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid1: true, + }, + }, + { + name: "test one partition is complete and one partition is failed with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete and one partition is failed one block deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test all partitions are complete", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: true, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test partitioned group created after visit marker", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is in progress not expired and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is not visited and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + { + name: "test one partition is expired and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + bucketClient := &bucket.ClientMock{} + for _, partitionVisitMarker := range tcase.partitionVisitMarkers { + content, _ := json.Marshal(partitionVisitMarker) + bucketClient.MockGet(partitionVisitMarker.GetVisitMarkerFilePath(), string(content), nil) + } + + for _, partition := range tcase.partitionedGroupInfo.Partitions { + for _, blockID := range partition.Blocks { + metaPath := path.Join(blockID.String(), metadata.MetaFilename) + noCompactPath := path.Join(blockID.String(), metadata.NoCompactMarkFilename) + deletionMarkerPath := path.Join(blockID.String(), metadata.DeletionMarkFilename) + if hasDeletionMarker, ok := tcase.deletedBlock[blockID]; ok { + if hasDeletionMarker { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, true, nil) + } else { + bucketClient.MockExists(metaPath, false, nil) + } + } else { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, false, nil) + } + if _, ok := tcase.noCompactBlock[blockID]; ok { + bucketClient.MockExists(noCompactPath, true, nil) + } else { + bucketClient.MockExists(noCompactPath, false, nil) + } + } + } + bucketClient.MockGet(mock.Anything, "", nil) + + ctx := context.Background() + logger := log.NewNopLogger() + result := tcase.partitionedGroupInfo.getPartitionedGroupStatus(ctx, bucketClient, 60*time.Second, logger) + require.Equal(t, tcase.expectedResult.CanDelete, result.CanDelete) + require.Equal(t, tcase.expectedResult.IsCompleted, result.IsCompleted) + require.Equal(t, len(tcase.expectedResult.PendingOrFailedPartitions), len(result.PendingOrFailedPartitions)) + for _, partition := range result.PendingOrFailedPartitions { + require.Contains(t, tcase.expectedResult.PendingOrFailedPartitions, partition) + } + }) + } +} diff --git a/pkg/compactor/shuffle_sharding_grouper.go b/pkg/compactor/shuffle_sharding_grouper.go index a041f55b6b..f6328b8fb5 100644 --- a/pkg/compactor/shuffle_sharding_grouper.go +++ b/pkg/compactor/shuffle_sharding_grouper.go @@ -289,15 +289,20 @@ func (g *ShuffleShardingGrouper) checkSubringForCompactor() (bool, error) { return rs.Includes(g.ringLifecyclerAddr), nil } -// Get the hash of a group based on the UserID, and the starting and ending time of the group's range. +// hashGroup Get the hash of a group based on the UserID, and the starting and ending time of the group's range. func hashGroup(userID string, rangeStart int64, rangeEnd int64) uint32 { groupString := fmt.Sprintf("%v%v%v", userID, rangeStart, rangeEnd) - groupHasher := fnv.New32a() + + return hashString(groupString) +} + +func hashString(s string) uint32 { + hasher := fnv.New32a() // Hasher never returns err. - _, _ = groupHasher.Write([]byte(groupString)) - groupHash := groupHasher.Sum32() + _, _ = hasher.Write([]byte(s)) + result := hasher.Sum32() - return groupHash + return result } func createGroupKey(groupHash uint32, group blocksGroup) string { diff --git a/pkg/storage/tsdb/meta_extensions.go b/pkg/storage/tsdb/meta_extensions.go new file mode 100644 index 0000000000..b6b8a7acf0 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions.go @@ -0,0 +1,71 @@ +package tsdb + +import ( + "fmt" + "strconv" + + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +type CortexMetaExtensions struct { + PartitionInfo *PartitionInfo `json:"partition_info,omitempty"` + TimeRange int64 `json:"time_range,omitempty"` +} + +type PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitioned_group_id"` + PartitionCount int `json:"partition_count"` + PartitionID int `json:"partition_id"` + PartitionedGroupCreationTime int64 `json:"partitioned_group_creation_time"` +} + +var ( + DefaultPartitionInfo = PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + PartitionedGroupCreationTime: 0, + } +) + +func (c *CortexMetaExtensions) TimeRangeStr() string { + return strconv.FormatInt(c.TimeRange, 10) +} + +func ConvertToCortexMetaExtensions(extensions any) (*CortexMetaExtensions, error) { + defaultPartitionInfo := DefaultPartitionInfo + cortexExtensions, err := metadata.ConvertExtensions(extensions, &CortexMetaExtensions{ + PartitionInfo: &defaultPartitionInfo, + }) + + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + converted, ok := cortexExtensions.(*CortexMetaExtensions) + if !ok { + return nil, fmt.Errorf("unable to convert extensions to CortexMetaExtensions") + } + return converted, nil +} + +func ConvertToPartitionInfo(extensions any) (*PartitionInfo, error) { + cortexExtensions, err := ConvertToCortexMetaExtensions(extensions) + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + return cortexExtensions.PartitionInfo, nil +} + +func GetCortexMetaExtensionsFromMeta(meta metadata.Meta) (*CortexMetaExtensions, error) { + return ConvertToCortexMetaExtensions(meta.Thanos.Extensions) +} + +func GetPartitionInfo(meta metadata.Meta) (*PartitionInfo, error) { + return ConvertToPartitionInfo(meta.Thanos.Extensions) +} diff --git a/pkg/storage/tsdb/meta_extensions_test.go b/pkg/storage/tsdb/meta_extensions_test.go new file mode 100644 index 0000000000..6f296eb461 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions_test.go @@ -0,0 +1,182 @@ +package tsdb + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +func TestGetPartitionedInfo(t *testing.T) { + for _, tcase := range []struct { + name string + meta metadata.Meta + expected *PartitionInfo + }{ + { + name: "partition info with all information provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + { + name: "partition info with only PartitionedGroupID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionID: 5, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 5, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionCount provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionCount: 4, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 4, + }, + }, + { + name: "meta with empty partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{}, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "meta with nil partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: nil, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with non CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + dummy string + }{ + dummy: "test_dummy", + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with invalid CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + } `json:"partition_info,omitempty"` + }{ + PartitionInfo: struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + }{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta does not have any extensions", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: nil, + }, + }, + expected: nil, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + result, err := GetPartitionInfo(tcase.meta) + assert.NoError(t, err) + if tcase.expected == nil { + assert.Nil(t, result) + } else { + assert.Equal(t, *tcase.expected, *result) + } + }) + } +} diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 87173abb13..bec831c238 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -186,8 +186,12 @@ type Limits struct { MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"` // Compactor. - CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` - CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` + CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorPartitionIndexSizeLimitInBytes int64 `yaml:"compactor_partition_index_size_limit_in_bytes" json:"compactor_partition_index_size_limit_in_bytes"` + CompactorPartitionSeriesCountLimit int64 `yaml:"compactor_partition_series_count_limit" json:"compactor_partition_series_count_limit"` + CompactorPartitionLevel1IndexSizeLimitInBytes int64 `yaml:"compactor_partition_level1_index_size_limit_in_bytes" json:"compactor_partition_level1_index_size_limit_in_bytes"` + CompactorPartitionLevel1SeriesCountLimit int64 `yaml:"compactor_partition_level1_series_count_limit" json:"compactor_partition_level1_series_count_limit"` // This config doesn't have a CLI flag registered here because they're registered in // their own original config struct. @@ -274,6 +278,10 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") + f.Int64Var(&l.CompactorPartitionIndexSizeLimitInBytes, "compactor.partition-index-size-limit-in-bytes", 0, "Index size limit in bytes for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionSeriesCountLimit, "compactor.partition-series-count-limit", 0, "Time series count limit for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionLevel1IndexSizeLimitInBytes, "compactor.partition-level1-index-size-limit-in-bytes", 0, "Index size limit in bytes for each level 1 compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionLevel1SeriesCountLimit, "compactor.partition-level1-series-count-limit", 0, "Time series count limit for each level 1 compaction partition. 0 means no limit") // Store-gateway. f.Float64Var(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant. If the value is < 1 the shard size will be a percentage of the total store-gateways.") @@ -769,6 +777,26 @@ func (o *Overrides) CompactorTenantShardSize(userID string) int { return o.GetOverridesForUser(userID).CompactorTenantShardSize } +// CompactorPartitionIndexSizeLimitInBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionIndexSizeLimitInBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionIndexSizeLimitInBytes +} + +// CompactorPartitionSeriesCountLimit returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionSeriesCountLimit(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionSeriesCountLimit +} + +// CompactorPartitionLevel1IndexSizeLimitInBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionLevel1IndexSizeLimitInBytes +} + +// CompactorPartitionLevel1SeriesCountLimit returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionLevel1SeriesCountLimit(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionLevel1SeriesCountLimit +} + // MetricRelabelConfigs returns the metric relabel configs for a given user. func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { return o.GetOverridesForUser(userID).MetricRelabelConfigs From 04b50a3eca6ec0198fb889bcbd2645e3912310b6 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Mon, 19 Aug 2024 17:55:38 -0700 Subject: [PATCH 2/6] fix comment Signed-off-by: Alex Le --- pkg/util/shard.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/shard.go b/pkg/util/shard.go index 5d3de01cc4..364f39656f 100644 --- a/pkg/util/shard.go +++ b/pkg/util/shard.go @@ -11,7 +11,7 @@ const ( ShardingStrategyDefault = "default" ShardingStrategyShuffle = "shuffle-sharding" - // Compaction mode + // Compaction strategies CompactionStrategyDefault = "default" CompactionStrategyPartitioning = "partitioning" ) From e4081739d1d827cbe72e46e3bd904b07db7078ed Mon Sep 17 00:00:00 2001 From: Alex Le Date: Tue, 20 Aug 2024 11:16:52 -0700 Subject: [PATCH 3/6] replace level 1 compaction limits with ingestion replication factor Signed-off-by: Alex Le --- pkg/compactor/compactor.go | 29 ++++++++++++------- pkg/compactor/compactor_test.go | 2 +- pkg/compactor/partition_compaction_grouper.go | 10 +++++-- .../partition_compaction_grouper_test.go | 4 +-- pkg/cortex/modules.go | 3 +- pkg/util/validation/limits.go | 22 +++----------- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 52659dc188..ef0338313c 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -60,7 +60,7 @@ var ( errInvalidCompactionStrategy = errors.New("invalid compaction strategy") errInvalidCompactionStrategyPartitioning = errors.New("compaction strategy partitioning can only be enabled when shuffle sharding is enabled") - DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { + DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter, _ int) compact.Grouper { return compact.NewDefaultGrouperWithMetrics( logger, bkt, @@ -79,7 +79,7 @@ var ( cfg.BlocksFetchConcurrency) } - ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { + ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ingestionReplicationFactor int) compact.Grouper { if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { return NewPartitionCompactionGrouper( ctx, @@ -102,7 +102,8 @@ var ( cfg.CompactionConcurrency, true, cfg.CompactionVisitMarkerTimeout, - noCompactionMarkFilter.NoCompactMarkedBlocks) + noCompactionMarkFilter.NoCompactMarkedBlocks, + ingestionReplicationFactor) } else { return NewShuffleShardingGrouper( ctx, @@ -177,6 +178,7 @@ type BlocksGrouperFactory func( limit Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, + ingestionReplicationFactor int, ) compact.Grouper // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks. @@ -205,8 +207,6 @@ type Limits interface { CompactorTenantShardSize(userID string) int CompactorPartitionIndexSizeLimitInBytes(userID string) int64 CompactorPartitionSeriesCountLimit(userID string) int64 - CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 - CompactorPartitionLevel1SeriesCountLimit(userID string) int64 } // Config holds the Compactor config. @@ -404,10 +404,13 @@ type Compactor struct { // Thanos compactor metrics per user compactorMetrics *compactorMetrics + + // Replication factor of ingester ring + ingestionReplicationFactor int } // NewCompactor makes a new Compactor. -func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides) (*Compactor, error) { +func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides, ingestionReplicationFactor int) (*Compactor, error) { bucketClientFactory := func(ctx context.Context) (objstore.InstrumentedBucket, error) { return bucket.NewClient(ctx, storageCfg.Bucket, "compactor", logger, registerer) } @@ -430,7 +433,11 @@ func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfi } } - cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits) + if ingestionReplicationFactor <= 0 { + ingestionReplicationFactor = 1 + } + + cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits, ingestionReplicationFactor) if err != nil { return nil, errors.Wrap(err, "failed to create Cortex blocks compactor") } @@ -447,6 +454,7 @@ func newCompactor( blocksGrouperFactory BlocksGrouperFactory, blocksCompactorFactory BlocksCompactorFactory, limits *validation.Overrides, + ingestionReplicationFactor int, ) (*Compactor, error) { var compactorMetrics *compactorMetrics if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { @@ -521,8 +529,9 @@ func newCompactor( Name: "cortex_compactor_block_visit_marker_write_failed", Help: "Number of block visit marker file failed to be written.", }), - limits: limits, - compactorMetrics: compactorMetrics, + limits: limits, + compactorMetrics: compactorMetrics, + ingestionReplicationFactor: ingestionReplicationFactor, } if len(compactorCfg.EnabledTenants) > 0 { @@ -954,7 +963,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { compactor, err := compact.NewBucketCompactor( ulogger, syncer, - c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter), + c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter, c.ingestionReplicationFactor), c.blocksPlannerFactory(currentCtx, bucket, ulogger, c.compactorCfg, noCompactMarkerFilter, c.ringLifecycler, userID, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, c.compactorMetrics), c.blocksCompactor, c.compactDirForUser(userID), diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index 908f962cf2..f610bb6e56 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -1606,7 +1606,7 @@ func prepare(t *testing.T, compactorCfg Config, bucketClient objstore.Instrument blocksGrouperFactory = DefaultBlocksGrouperFactory } - c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides) + c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides, 1) require.NoError(t, err) return c, tsdbCompactor, tsdbPlanner, logs, registry diff --git a/pkg/compactor/partition_compaction_grouper.go b/pkg/compactor/partition_compaction_grouper.go index 7eb48ab723..2650bfbd0d 100644 --- a/pkg/compactor/partition_compaction_grouper.go +++ b/pkg/compactor/partition_compaction_grouper.go @@ -53,6 +53,8 @@ type PartitionCompactionGrouper struct { noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark partitionVisitMarkerTimeout time.Duration + + ingestionReplicationFactor int } func NewPartitionCompactionGrouper( @@ -77,6 +79,7 @@ func NewPartitionCompactionGrouper( doRandomPick bool, partitionVisitMarkerTimeout time.Duration, noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, + ingestionReplicationFactor int, ) *PartitionCompactionGrouper { if logger == nil { logger = log.NewNopLogger() @@ -104,6 +107,7 @@ func NewPartitionCompactionGrouper( doRandomPick: doRandomPick, partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, noCompBlocksFunc: noCompBlocksFunc, + ingestionReplicationFactor: ingestionReplicationFactor, } } @@ -440,9 +444,9 @@ func (g *PartitionCompactionGrouper) calculatePartitionCount(group blocksGroupWi smallestRange := g.compactorCfg.BlockRanges.ToMilliseconds()[0] groupRange := group.rangeLength() if smallestRange >= groupRange { - level.Info(g.logger).Log("msg", "use level 1 block limits", "partitioned_group_id", groupHash, "smallestRange", smallestRange, "groupRange", groupRange) - indexSizeLimit = g.limits.CompactorPartitionLevel1IndexSizeLimitInBytes(g.userID) - seriesCountLimit = g.limits.CompactorPartitionLevel1SeriesCountLimit(g.userID) + level.Info(g.logger).Log("msg", "calculate level 1 block limits", "partitioned_group_id", groupHash, "smallest_range", smallestRange, "group_range", groupRange, "ingestion_replication_factor", g.ingestionReplicationFactor) + indexSizeLimit = indexSizeLimit * int64(g.ingestionReplicationFactor) + seriesCountLimit = seriesCountLimit * int64(g.ingestionReplicationFactor) } totalIndexSizeInBytes := int64(0) diff --git a/pkg/compactor/partition_compaction_grouper_test.go b/pkg/compactor/partition_compaction_grouper_test.go index e910bd8126..fc4df11427 100644 --- a/pkg/compactor/partition_compaction_grouper_test.go +++ b/pkg/compactor/partition_compaction_grouper_test.go @@ -1929,8 +1929,7 @@ func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { } limits := &validation.Limits{ - CompactorPartitionSeriesCountLimit: 4, - CompactorPartitionLevel1SeriesCountLimit: 4, + CompactorPartitionSeriesCountLimit: 4, } overrides, err := validation.NewOverrides(*limits, nil) require.NoError(t, err) @@ -1989,6 +1988,7 @@ func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { false, visitMarkerTimeout, noCompactFilter, + 1, ) actual, err := g.generateCompactionJobs(testCase.getBlocks()) require.NoError(t, err) diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index de1f15d260..f5bf52fd60 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -650,8 +650,9 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) { func (t *Cortex) initCompactor() (serv services.Service, err error) { t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort + ingestionReplicationFactor := t.Cfg.Ingester.LifecyclerConfig.RingConfig.ReplicationFactor - t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides) + t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides, ingestionReplicationFactor) if err != nil { return } diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index bec831c238..7bd1627e67 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -186,12 +186,10 @@ type Limits struct { MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"` // Compactor. - CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` - CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` - CompactorPartitionIndexSizeLimitInBytes int64 `yaml:"compactor_partition_index_size_limit_in_bytes" json:"compactor_partition_index_size_limit_in_bytes"` - CompactorPartitionSeriesCountLimit int64 `yaml:"compactor_partition_series_count_limit" json:"compactor_partition_series_count_limit"` - CompactorPartitionLevel1IndexSizeLimitInBytes int64 `yaml:"compactor_partition_level1_index_size_limit_in_bytes" json:"compactor_partition_level1_index_size_limit_in_bytes"` - CompactorPartitionLevel1SeriesCountLimit int64 `yaml:"compactor_partition_level1_series_count_limit" json:"compactor_partition_level1_series_count_limit"` + CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` + CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorPartitionIndexSizeLimitInBytes int64 `yaml:"compactor_partition_index_size_limit_in_bytes" json:"compactor_partition_index_size_limit_in_bytes"` + CompactorPartitionSeriesCountLimit int64 `yaml:"compactor_partition_series_count_limit" json:"compactor_partition_series_count_limit"` // This config doesn't have a CLI flag registered here because they're registered in // their own original config struct. @@ -280,8 +278,6 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") f.Int64Var(&l.CompactorPartitionIndexSizeLimitInBytes, "compactor.partition-index-size-limit-in-bytes", 0, "Index size limit in bytes for each compaction partition. 0 means no limit") f.Int64Var(&l.CompactorPartitionSeriesCountLimit, "compactor.partition-series-count-limit", 0, "Time series count limit for each compaction partition. 0 means no limit") - f.Int64Var(&l.CompactorPartitionLevel1IndexSizeLimitInBytes, "compactor.partition-level1-index-size-limit-in-bytes", 0, "Index size limit in bytes for each level 1 compaction partition. 0 means no limit") - f.Int64Var(&l.CompactorPartitionLevel1SeriesCountLimit, "compactor.partition-level1-series-count-limit", 0, "Time series count limit for each level 1 compaction partition. 0 means no limit") // Store-gateway. f.Float64Var(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant. If the value is < 1 the shard size will be a percentage of the total store-gateways.") @@ -787,16 +783,6 @@ func (o *Overrides) CompactorPartitionSeriesCountLimit(userID string) int64 { return o.GetOverridesForUser(userID).CompactorPartitionSeriesCountLimit } -// CompactorPartitionLevel1IndexSizeLimitInBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. -func (o *Overrides) CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 { - return o.GetOverridesForUser(userID).CompactorPartitionLevel1IndexSizeLimitInBytes -} - -// CompactorPartitionLevel1SeriesCountLimit returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. -func (o *Overrides) CompactorPartitionLevel1SeriesCountLimit(userID string) int64 { - return o.GetOverridesForUser(userID).CompactorPartitionLevel1SeriesCountLimit -} - // MetricRelabelConfigs returns the metric relabel configs for a given user. func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { return o.GetOverridesForUser(userID).MetricRelabelConfigs From eb09a545e187b69a708dccf76ac2d4db7e533520 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Tue, 20 Aug 2024 11:18:23 -0700 Subject: [PATCH 4/6] fix doc Signed-off-by: Alex Le --- docs/configuration/config-file-reference.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 40078bc469..50e4816455 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3394,16 +3394,6 @@ query_rejection: # CLI flag: -compactor.partition-series-count-limit [compactor_partition_series_count_limit: | default = 0] -# Index size limit in bytes for each level 1 compaction partition. 0 means no -# limit -# CLI flag: -compactor.partition-level1-index-size-limit-in-bytes -[compactor_partition_level1_index_size_limit_in_bytes: | default = 0] - -# Time series count limit for each level 1 compaction partition. 0 means no -# limit -# CLI flag: -compactor.partition-level1-series-count-limit -[compactor_partition_level1_series_count_limit: | default = 0] - # S3 server-side encryption type. Required to enable server-side encryption # overrides for a specific tenant. If not set, the default S3 client settings # are used. From 8f3423975c63be5761402e6e7bb375d85b865cfd Mon Sep 17 00:00:00 2001 From: Alex Le Date: Wed, 2 Oct 2024 16:23:11 -0700 Subject: [PATCH 5/6] update compaction_visit_marker_timeout default value Signed-off-by: Alex Le --- docs/blocks-storage/compactor.md | 2 +- docs/configuration/config-file-reference.md | 2 +- pkg/compactor/compactor.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md index 05abf73eed..f5539511ca 100644 --- a/docs/blocks-storage/compactor.md +++ b/docs/blocks-storage/compactor.md @@ -292,7 +292,7 @@ compactor: # How long compaction visit marker file should be considered as expired and # able to be picked up by compactor again. # CLI flag: -compactor.compaction-visit-marker-timeout - [compaction_visit_marker_timeout: | default = 1m30s] + [compaction_visit_marker_timeout: | default = 10m] # How frequently compaction visit marker file should be updated duration # compaction. diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 50e4816455..061a86570c 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2223,7 +2223,7 @@ sharding_ring: # How long compaction visit marker file should be considered as expired and able # to be picked up by compactor again. # CLI flag: -compactor.compaction-visit-marker-timeout -[compaction_visit_marker_timeout: | default = 1m30s] +[compaction_visit_marker_timeout: | default = 10m] # How frequently compaction visit marker file should be updated duration # compaction. diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index ef0338313c..ad9ae3d214 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -296,7 +296,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") - f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 90*time.Second, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") + f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 10*time.Minute, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") f.DurationVar(&cfg.CompactionVisitMarkerFileUpdateInterval, "compactor.compaction-visit-marker-file-update-interval", 1*time.Minute, "How frequently compaction visit marker file should be updated duration compaction.") f.DurationVar(&cfg.CleanerVisitMarkerTimeout, "compactor.cleaner-visit-marker-timeout", 10*time.Minute, "How long cleaner visit marker file should be considered as expired and able to be picked up by cleaner again. The value should be smaller than -compactor.cleanup-interval") From baf29691dbe8aae700becea772cb9e89d138a432 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Thu, 19 Dec 2024 21:07:44 -0800 Subject: [PATCH 6/6] update default value for compactor_partition_index_size_limit_in_bytes Signed-off-by: Alex Le --- docs/configuration/config-file-reference.md | 2 +- pkg/util/validation/limits.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index ce52ee95ae..5db4fe4b7e 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3591,7 +3591,7 @@ query_rejection: # Index size limit in bytes for each compaction partition. 0 means no limit # CLI flag: -compactor.partition-index-size-limit-in-bytes -[compactor_partition_index_size_limit_in_bytes: | default = 0] +[compactor_partition_index_size_limit_in_bytes: | default = 68719476736] # Time series count limit for each compaction partition. 0 means no limit # CLI flag: -compactor.partition-series-count-limit diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 32a24f18bc..11458c8165 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -284,7 +284,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") - f.Int64Var(&l.CompactorPartitionIndexSizeLimitInBytes, "compactor.partition-index-size-limit-in-bytes", 0, "Index size limit in bytes for each compaction partition. 0 means no limit") + // Default to 64GB because this is the hard limit of index size in Cortex + f.Int64Var(&l.CompactorPartitionIndexSizeLimitInBytes, "compactor.partition-index-size-limit-in-bytes", 68719476736, "Index size limit in bytes for each compaction partition. 0 means no limit") f.Int64Var(&l.CompactorPartitionSeriesCountLimit, "compactor.partition-series-count-limit", 0, "Time series count limit for each compaction partition. 0 means no limit") // Store-gateway.