From 45069a0f1296d5f439f9c7bc5c7a35f492b4798b Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Mon, 22 May 2023 11:51:55 -0400 Subject: [PATCH] db: consider scheduling compaction when adding point tombstone compensation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when the asynchronous table stats collector calculated a positive PointDeletionsBytesEstimate, it did not consider scheduling compactions unless it also calculated a positive RangeDeletionsBytesEstimate. If the database was otherwise quiet with few flushes and no in-progress compactions, this could delay the scheduling of a compaction until the next flush or ingest, despite levels have scores ≥ 1.0. This was illustrated with CockroachDB's point tombstone roachtest, with at times no running compactions despite levels having scores as high as 50 due to high volumes of data dropped by point tombstonees. Note, an issue still remains whereby L0 files that delete large amounts of data do not trigger compactions out of L0 on their own. This is a consequence of L0's different scoring heuristics which do not consider compensated size. See --- compaction_picker.go | 13 +++---- compaction_picker_test.go | 36 +++++++++++++++---- table_stats.go | 2 +- testdata/compaction_picker_scores | 58 +++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 13 deletions(-) diff --git a/compaction_picker.go b/compaction_picker.go index 82d484e132..4ea11548a5 100644 --- a/compaction_picker.go +++ b/compaction_picker.go @@ -674,15 +674,16 @@ type candidateLevelInfo struct { file manifest.LevelFile } +func fileCompensation(f *fileMetadata) uint64 { + return uint64(f.Stats.PointDeletionsBytesEstimate) + f.Stats.RangeDeletionsBytesEstimate +} + // compensatedSize returns f's file size, inflated according to compaction // priorities. func compensatedSize(f *fileMetadata) uint64 { - sz := f.Size - // Add in the estimate of disk space that may be reclaimed by compacting - // the file's tombstones. - sz += uint64(f.Stats.PointDeletionsBytesEstimate) - sz += f.Stats.RangeDeletionsBytesEstimate - return sz + // Add in the estimate of disk space that may be reclaimed by compacting the + // file's tombstones. + return f.Size + fileCompensation(f) } // compensatedSizeAnnotator implements manifest.Annotator, annotating B-Tree diff --git a/compaction_picker_test.go b/compaction_picker_test.go index ac7a25dcfb..db7d119ed4 100644 --- a/compaction_picker_test.go +++ b/compaction_picker_test.go @@ -1314,6 +1314,19 @@ func TestCompactionPickerScores(t *testing.T) { var buf bytes.Buffer datadriven.RunTest(t, "testdata/compaction_picker_scores", func(t *testing.T, td *datadriven.TestData) string { switch td.Cmd { + case "define": + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + case "disable-table-stats": d.mu.Lock() d.opts.private.disableTableStats = true @@ -1327,12 +1340,11 @@ func TestCompactionPickerScores(t *testing.T) { d.mu.Unlock() return "" - case "define": - require.NoError(t, closeAllSnapshots(d)) - require.NoError(t, d.Close()) - - d, err = runDBDefineCmd(td, opts) - if err != nil { + case "ingest": + if err = runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + if err = runIngestCmd(td, d, d.opts.FS); err != nil { return err.Error() } d.mu.Lock() @@ -1340,7 +1352,19 @@ func TestCompactionPickerScores(t *testing.T) { d.mu.Unlock() return s + case "lsm": + return runLSMCmd(td, d) + case "scores": + // Wait for any running compactions to complete before calculating + // scores. Otherwise, the output of this command is + // nondeterministic. + d.mu.Lock() + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + buf.Reset() fmt.Fprintf(&buf, "L Size Score\n") for l, lm := range d.Metrics().Levels { diff --git a/table_stats.go b/table_stats.go index 25f28e3510..ef6f0c74da 100644 --- a/table_stats.go +++ b/table_stats.go @@ -126,7 +126,7 @@ func (d *DB) collectTableStats() bool { maybeCompact := false for _, c := range collected { c.fileMetadata.Stats = c.TableStats - maybeCompact = maybeCompact || c.TableStats.RangeDeletionsBytesEstimate > 0 + maybeCompact = maybeCompact || fileCompensation(c.fileMetadata) > 0 c.fileMetadata.StatsMarkValid() } d.mu.tableStats.cond.Broadcast() diff --git a/testdata/compaction_picker_scores b/testdata/compaction_picker_scores index 68e71af1ff..45346f151b 100644 --- a/testdata/compaction_picker_scores +++ b/testdata/compaction_picker_scores @@ -105,3 +105,61 @@ L3 0 B 0.0 L4 0 B 0.0 L5 824 B 2.3 L6 321 K - + +# Run a similar test as above, but this time the table containing the DELs is +# ingested after the database is initialized, with table stats enabled and +# automatic compactions enabled. When the ingested sstable's stats are loaded, +# it should trigger an automatic compaction of the ingested sstable on account +# of the high point-deletions-bytes-estimate value. +# +# This a regression test for an issue where the table stats collector wouldn't +# attempt to schedule a compaction if a file only had compensation due to point +# deletions and not range deletions. + +define lbase-max-bytes=65536 enable-table-stats=true auto-compactions=on +L6 + a.SET.1: + b.SET.1: + c.SET.1: + d.SET.1: + e.SET.1: +---- +6: + 000004:[a#1,SET-e#1,SET] + +ingest ext1 +del a: +del b: +del c: +del d: +del e: +---- +5: + 000005:[a:#10,DEL-e:#10,DEL] +6: + 000004:[a#1,SET-e#1,SET] + +wait-pending-table-stats +000005 +---- +num-entries: 5 +num-deletions: 5 +num-range-key-sets: 0 +point-deletions-bytes-estimate: 164723 +range-deletions-bytes-estimate: 0 + +scores +---- +L Size Score +L0 0 B 0.0 +L1 0 B 0.0 +L2 0 B 0.0 +L3 0 B 0.0 +L4 0 B 0.0 +L5 0 B 0.0 +L6 321 K - + +lsm +---- +6: + 000006:[a#0,SET-e#0,SET]