diff --git a/compaction.go b/compaction.go index ba8d2cc5d3..fcc840dd39 100644 --- a/compaction.go +++ b/compaction.go @@ -2772,17 +2772,16 @@ func (d *DB) runCompaction( // The table is typically written at the maximum allowable format implied by // the current format major version of the DB. tableFormat := formatVers.MaxTableFormat() - if tableFormat > sstable.TableFormatPebblev3 { - // Since TableFormatPebblev3 does not currently subsume - // TableFormatPebblev2, this panic ensures that we have carefully thought - // through what we are doing before we introduce a format beyond - // TableFormatPebblev3. - panic("cannot handle table format beyond TableFormatPebblev3") - } + + // In format major versions with maximum table formats of Pebblev3, value + // blocks were conditional on an experimental setting. In format major + // versions with maximum table formats of Pebblev4 and higher, value blocks + // are always enabled. if tableFormat == sstable.TableFormatPebblev3 && (d.opts.Experimental.EnableValueBlocks == nil || !d.opts.Experimental.EnableValueBlocks()) { tableFormat = sstable.TableFormatPebblev2 } + writerOpts := d.opts.MakeWriterOptions(c.outputLevel.level, tableFormat) if formatVers < FormatBlockPropertyCollector { // Cannot yet write block properties. diff --git a/data_test.go b/data_test.go index 74973ee61f..ce1d06f6a4 100644 --- a/data_test.go +++ b/data_test.go @@ -606,6 +606,8 @@ func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error { tableFormat = sstable.TableFormatPebblev2 case "pebblev3": tableFormat = sstable.TableFormatPebblev3 + case "pebblev4": + tableFormat = sstable.TableFormatPebblev4 default: return errors.Errorf("unknown format string %s", cmdArg.Vals[0]) } diff --git a/format_major_version.go b/format_major_version.go index 48b3b2840f..6d96297075 100644 --- a/format_major_version.go +++ b/format_major_version.go @@ -168,8 +168,7 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, FormatUnusedPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev2 - case FormatSSTableValueBlocks, FormatFlushableIngest, - FormatPrePebblev1MarkedCompacted: + case FormatSSTableValueBlocks, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev3 default: panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) diff --git a/options.go b/options.go index 4e99437c07..2bdc9d505c 100644 --- a/options.go +++ b/options.go @@ -613,8 +613,11 @@ type Options struct { PointTombstoneWeight float64 // EnableValueBlocks is used to decide whether to enable writing - // TableFormatPebblev3 sstables. WARNING: do not return true yet, since - // support for TableFormatPebblev3 is incomplete and not production ready. + // TableFormatPebblev3 sstables. This setting is only respected by a + // specific subset of format major versions: FormatSSTableValueBlocks, + // FormatFlushableIngest and FormatPrePebblev1MarkedCompacted. In lower + // format major versions, value blocks are never enabled. In higher + // format major versions, value blocks are always enabled. EnableValueBlocks func() bool // ShortAttributeExtractor is used iff EnableValueBlocks() returns true diff --git a/sstable/format.go b/sstable/format.go index f719ec0e73..91909835c2 100644 --- a/sstable/format.go +++ b/sstable/format.go @@ -23,12 +23,10 @@ const ( TableFormatRocksDBv2 TableFormatPebblev1 // Block properties. TableFormatPebblev2 // Range keys. - // TableFormatPebblev3 is not currently intended to subsume v2, as - // supporting value blocks adds a 1 byte prefix to each value. After - // thorough experimentation and some production experience, this may change. TableFormatPebblev3 // Value blocks. + TableFormatPebblev4 // Reserved. - TableFormatMax = TableFormatPebblev3 + TableFormatMax = TableFormatPebblev4 ) // ParseTableFormat parses the given magic bytes and version into its @@ -52,6 +50,8 @@ func ParseTableFormat(magic []byte, version uint32) (TableFormat, error) { return TableFormatPebblev2, nil case 3: return TableFormatPebblev3, nil + case 4: + return TableFormatPebblev4, nil default: return TableFormatUnspecified, base.CorruptionErrorf( "pebble/table: unsupported pebble format version %d", errors.Safe(version), @@ -77,6 +77,8 @@ func (f TableFormat) AsTuple() (string, uint32) { return pebbleDBMagic, 2 case TableFormatPebblev3: return pebbleDBMagic, 3 + case TableFormatPebblev4: + return pebbleDBMagic, 4 default: panic("sstable: unknown table format version tuple") } @@ -95,6 +97,8 @@ func (f TableFormat) String() string { return "(Pebble,v2)" case TableFormatPebblev3: return "(Pebble,v3)" + case TableFormatPebblev4: + return "(Pebble,v4)" default: panic("sstable: unknown table format version tuple") } diff --git a/sstable/format_test.go b/sstable/format_test.go index cd1d1735be..f5589c1239 100644 --- a/sstable/format_test.go +++ b/sstable/format_test.go @@ -49,6 +49,12 @@ func TestTableFormat_RoundTrip(t *testing.T) { version: 3, want: TableFormatPebblev3, }, + { + name: "PebbleDBv4", + magic: pebbleDBMagic, + version: 4, + want: TableFormatPebblev4, + }, // Invalid cases. { name: "Invalid RocksDB version", @@ -59,8 +65,8 @@ func TestTableFormat_RoundTrip(t *testing.T) { { name: "Invalid PebbleDB version", magic: pebbleDBMagic, - version: 4, - wantErr: "pebble/table: unsupported pebble format version 4", + version: 5, + wantErr: "pebble/table: unsupported pebble format version 5", }, { name: "Unknown magic string", diff --git a/sstable/reader.go b/sstable/reader.go index 97f027e817..0bbfd628a2 100644 --- a/sstable/reader.go +++ b/sstable/reader.go @@ -435,7 +435,7 @@ func (i *singleLevelIterator) init( return err } i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc) - if r.tableFormat == TableFormatPebblev3 { + if r.tableFormat >= TableFormatPebblev3 { if r.Properties.NumValueBlocks > 0 { // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader // can outlive the singleLevelIterator due to be being embedded in a @@ -1874,7 +1874,7 @@ func (i *twoLevelIterator) init( return err } i.dataRH = r.readable.NewReadHandle(ctx) - if r.tableFormat == TableFormatPebblev3 { + if r.tableFormat >= TableFormatPebblev3 { if r.Properties.NumValueBlocks > 0 { i.vbReader = &valueBlockReader{ ctx: ctx, @@ -4067,7 +4067,7 @@ func (l *Layout) Describe( formatIsRestart(iter.data, iter.restarts, iter.numRestarts, iter.offset) if fmtRecord != nil { fmt.Fprintf(w, " ") - if l.Format != TableFormatPebblev3 { + if l.Format < TableFormatPebblev3 { fmtRecord(key, value.InPlaceValue()) } else { // InPlaceValue() will succeed even for data blocks where the diff --git a/sstable/suffix_rewriter.go b/sstable/suffix_rewriter.go index 39089c9884..984e8d139e 100644 --- a/sstable/suffix_rewriter.go +++ b/sstable/suffix_rewriter.go @@ -223,7 +223,7 @@ func rewriteBlocks( // in the block, which includes the 1-byte prefix. This is fine since bw // also does not know about the prefix and will preserve it in bw.add. v := val.InPlaceValue() - if invariants.Enabled && r.tableFormat == TableFormatPebblev3 && + if invariants.Enabled && r.tableFormat >= TableFormatPebblev3 && key.Kind() == InternalKeyKindSet { if len(v) < 1 { return errors.Errorf("value has no prefix") diff --git a/sstable/suffix_rewriter_test.go b/sstable/suffix_rewriter_test.go index 47141ff70b..da6a47232c 100644 --- a/sstable/suffix_rewriter_test.go +++ b/sstable/suffix_rewriter_test.go @@ -18,9 +18,7 @@ func TestRewriteSuffixProps(t *testing.T) { from, to := []byte("_212"), []byte("_646") format := TableFormatPebblev2 - if rand.Intn(2) != 0 { - format = TableFormatPebblev3 - } + format += TableFormat(rand.Intn(int(TableFormatPebblev4 - TableFormatPebblev2))) t.Logf("table format: %s\n", format.String()) wOpts := WriterOptions{ FilterPolicy: bloom.FilterPolicy(10), diff --git a/sstable/table.go b/sstable/table.go index f5e6a1c1d9..5267123029 100644 --- a/sstable/table.go +++ b/sstable/table.go @@ -428,7 +428,7 @@ func supportsTwoLevelIndex(format TableFormat) bool { switch format { case TableFormatLevelDB: return false - case TableFormatRocksDBv2, TableFormatPebblev1, TableFormatPebblev2, TableFormatPebblev3: + case TableFormatRocksDBv2, TableFormatPebblev1, TableFormatPebblev2, TableFormatPebblev3, TableFormatPebblev4: return true default: panic("sstable: unspecified table format version") diff --git a/sstable/testdata/writer_value_blocks b/sstable/testdata/writer_value_blocks index 04d664bc34..745cc97ad8 100644 --- a/sstable/testdata/writer_value_blocks +++ b/sstable/testdata/writer_value_blocks @@ -214,7 +214,7 @@ layout 1255 meta: offset=1185, length=64 1258 index: offset=264, length=77 1261 [padding] - 1295 version: 3 + 1295 version: 4 1299 magic number: 0xf09faab3f09faab3 1307 EOF @@ -337,6 +337,6 @@ layout 856 meta: offset=818, length=32 859 index: offset=71, length=22 861 [padding] - 896 version: 3 + 896 version: 4 900 magic number: 0xf09faab3f09faab3 908 EOF diff --git a/sstable/writer.go b/sstable/writer.go index aaeda43ea1..d8186881c1 100644 --- a/sstable/writer.go +++ b/sstable/writer.go @@ -2110,7 +2110,7 @@ func NewWriter(writable objstorage.Writable, o WriterOptions, extraOpts ...Write Format: o.Comparer.FormatKey, }, } - if w.tableFormat == TableFormatPebblev3 { + if w.tableFormat >= TableFormatPebblev3 { w.shortAttributeExtractor = o.ShortAttributeExtractor w.requiredInPlaceValueBound = o.RequiredInPlaceValueBound w.valueBlockWriter = newValueBlockWriter( diff --git a/table_cache.go b/table_cache.go index 0b97ce6903..9a3c5b5724 100644 --- a/table_cache.go +++ b/table_cache.go @@ -447,7 +447,7 @@ func (c *tableCacheShard) newIters( return nil, nil, err } var rp sstable.ReaderProvider - if tableFormat == sstable.TableFormatPebblev3 && v.reader.Properties.NumValueBlocks > 0 { + if tableFormat >= sstable.TableFormatPebblev3 && v.reader.Properties.NumValueBlocks > 0 { rp = &tableCacheShardReaderProvider{c: c, file: file, dbOpts: dbOpts} } diff --git a/testdata/format_major_version_pebblev1_migration b/testdata/format_major_version_pebblev1_migration index 52c356e932..c579a53050 100644 --- a/testdata/format_major_version_pebblev1_migration +++ b/testdata/format_major_version_pebblev1_migration @@ -70,6 +70,7 @@ tally-table-formats (Pebble,v1): 1 (Pebble,v2): 2 (Pebble,v3): 0 +(Pebble,v4): 0 # Upgrade the DB to FormatMinTableFormatPebblev1. @@ -166,3 +167,4 @@ tally-table-formats (Pebble,v1): 1 (Pebble,v2): 4 (Pebble,v3): 0 +(Pebble,v4): 0