From b7c0c5445b794b38ace6983d2caf4acf5c483f01 Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Wed, 12 Apr 2023 17:37:44 -0400 Subject: [PATCH] *: Implement iterators and seqnum substitution for foreign SSTs When sstables written to by other stores in shared storage are read, we need to collapse range keys/range dels/point keys within each SST and then return keys at fixed sequence numbers reserved for that level. This change implements that path for all sstables that are in shared storage according to objstorage.Provider but have a creator ID that doesn't match ours. --- internal/base/seqnums.go | 15 ++ internal/keyspan/iter.go | 3 + internal/keyspan/merging_iter.go | 43 ---- internal/keyspan/transformer.go | 149 ++++++++++++ internal/rangekey/coalesce.go | 70 ++++++ level_iter.go | 2 +- objstorage/objstorage.go | 5 + objstorage/objstorageprovider/shared.go | 8 + options.go | 13 + range_keys.go | 7 +- scan_internal.go | 306 ++++++++++++++++++++++-- scan_internal_test.go | 60 +++++ sstable/options.go | 6 + sstable/reader.go | 13 +- table_cache.go | 79 +++++- testdata/event_listener | 4 +- testdata/ingest | 2 +- testdata/metrics | 4 +- testdata/point_collapsing_iter | 173 ++++++++++++++ 19 files changed, 891 insertions(+), 71 deletions(-) create mode 100644 internal/keyspan/transformer.go create mode 100644 testdata/point_collapsing_iter diff --git a/internal/base/seqnums.go b/internal/base/seqnums.go index f563c1fbb9..aa38226a17 100644 --- a/internal/base/seqnums.go +++ b/internal/base/seqnums.go @@ -4,6 +4,8 @@ package base +import "fmt" + // This file defines sequence numbers that are reserved for foreign keys i.e. // internal keys coming from other Pebble instances and existing in shared // storage, as those will "slot below" any internal keys added by our own Pebble @@ -52,3 +54,16 @@ const ( // ourselves. SeqNumStart = uint64(10) ) + +// PointSeqNumForLevel returns the appropriate reserved sequence number for +// point keys in foreign sstables at the specified level. +func PointSeqNumForLevel(level int) uint64 { + switch level { + case 5: + return SeqNumL5Point + case 6: + return SeqNumL6Point + default: + panic(fmt.Sprintf("unexpected foreign sstable at level %d", level)) + } +} diff --git a/internal/keyspan/iter.go b/internal/keyspan/iter.go index 4798561ac6..59232aedd8 100644 --- a/internal/keyspan/iter.go +++ b/internal/keyspan/iter.go @@ -70,6 +70,9 @@ type SpanIterOptions struct { // RangeKeyFilters can be used to avoid scanning tables and blocks in tables // when iterating over range keys. RangeKeyFilters []base.BlockPropertyFilter + // Level specifies the level where this sstable is being read. Must be + // specified for foreign (i.e. shared not-created-by-this-instance) sstables. + Level manifest.Level } // Iter is an iterator over a set of fragmented spans. diff --git a/internal/keyspan/merging_iter.go b/internal/keyspan/merging_iter.go index 247602423e..c73ba59b32 100644 --- a/internal/keyspan/merging_iter.go +++ b/internal/keyspan/merging_iter.go @@ -25,49 +25,6 @@ import ( // MergingIter implementation, but will require a bit of plumbing to thread the // Equal function. -// Transformer defines a transformation to be applied to a Span. -type Transformer interface { - // Transform takes a Span as input and writes the transformed Span to the - // provided output *Span pointer. The output Span's Keys slice may be reused - // by Transform to reduce allocations. - Transform(cmp base.Compare, in Span, out *Span) error -} - -// The TransformerFunc type is an adapter to allow the use of ordinary functions -// as Transformers. If f is a function with the appropriate signature, -// TransformerFunc(f) is a Transformer that calls f. -type TransformerFunc func(base.Compare, Span, *Span) error - -// Transform calls f(cmp, in, out). -func (tf TransformerFunc) Transform(cmp base.Compare, in Span, out *Span) error { - return tf(cmp, in, out) -} - -var noopTransform Transformer = TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { - dst.Start, dst.End = s.Start, s.End - dst.Keys = append(dst.Keys[:0], s.Keys...) - return nil -}) - -// VisibleTransform filters keys that are invisible at the provided snapshot -// sequence number. -func VisibleTransform(snapshot uint64) Transformer { - return TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { - dst.Start, dst.End = s.Start, s.End - dst.Keys = dst.Keys[:0] - for _, k := range s.Keys { - // NB: The InternalKeySeqNumMax value is used for the batch snapshot - // because a batch's visible span keys are filtered when they're - // fragmented. There's no requirement to enforce visibility at - // iteration time. - if base.Visible(k.SeqNum(), snapshot, base.InternalKeySeqNumMax) { - dst.Keys = append(dst.Keys, k) - } - } - return nil - }) -} - // MergingIter merges spans across levels of the LSM, exposing an iterator over // spans that yields sets of spans fragmented at unique user key boundaries. // diff --git a/internal/keyspan/transformer.go b/internal/keyspan/transformer.go new file mode 100644 index 0000000000..e0152cf4d6 --- /dev/null +++ b/internal/keyspan/transformer.go @@ -0,0 +1,149 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// Transformer defines a transformation to be applied to a Span. +type Transformer interface { + // Transform takes a Span as input and writes the transformed Span to the + // provided output *Span pointer. The output Span's Keys slice may be reused + // by Transform to reduce allocations. + Transform(cmp base.Compare, in Span, out *Span) error +} + +// The TransformerFunc type is an adapter to allow the use of ordinary functions +// as Transformers. If f is a function with the appropriate signature, +// TransformerFunc(f) is a Transformer that calls f. +type TransformerFunc func(base.Compare, Span, *Span) error + +// Transform calls f(cmp, in, out). +func (tf TransformerFunc) Transform(cmp base.Compare, in Span, out *Span) error { + return tf(cmp, in, out) +} + +var noopTransform Transformer = TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { + dst.Start, dst.End = s.Start, s.End + dst.Keys = append(dst.Keys[:0], s.Keys...) + return nil +}) + +// VisibleTransform filters keys that are invisible at the provided snapshot +// sequence number. +func VisibleTransform(snapshot uint64) Transformer { + return TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { + dst.Start, dst.End = s.Start, s.End + dst.Keys = dst.Keys[:0] + for _, k := range s.Keys { + // NB: The InternalKeySeqNumMax value is used for the batch snapshot + // because a batch's visible span keys are filtered when they're + // fragmented. There's no requirement to enforce visibility at + // iteration time. + if base.Visible(k.SeqNum(), snapshot, base.InternalKeySeqNumMax) { + dst.Keys = append(dst.Keys, k) + } + } + return nil + }) +} + +// TransformerIter is a FragmentIterator that applies a Transformer on all +// returned keys. Used for when a caller needs to apply a transformer on an +// iterator but does not otherwise need the mergingiter's merging ability. +type TransformerIter struct { + FragmentIterator + + // Transformer is applied on every Span returned by this iterator. + Transformer Transformer + // Comparer in use for this keyspace. + Compare base.Compare + + span Span + err error +} + +func (t *TransformerIter) applyTransform(span *Span) *Span { + t.span = Span{ + Start: t.span.Start[:0], + End: t.span.End[:0], + Keys: t.span.Keys[:0], + } + if err := t.Transformer.Transform(t.Compare, *span, &t.span); err != nil { + t.err = err + return nil + } + return &t.span +} + +// SeekGE implements the FragmentIterator interface. +func (t *TransformerIter) SeekGE(key []byte) *Span { + span := t.FragmentIterator.SeekGE(key) + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// SeekLT implements the FragmentIterator interface. +func (t *TransformerIter) SeekLT(key []byte) *Span { + span := t.FragmentIterator.SeekLT(key) + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// First implements the FragmentIterator interface. +func (t *TransformerIter) First() *Span { + span := t.FragmentIterator.First() + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// Last implements the FragmentIterator interface. +func (t *TransformerIter) Last() *Span { + span := t.FragmentIterator.Last() + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// Next implements the FragmentIterator interface. +func (t *TransformerIter) Next() *Span { + span := t.FragmentIterator.Next() + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// Prev implements the FragmentIterator interface. +func (t *TransformerIter) Prev() *Span { + span := t.FragmentIterator.Prev() + if span == nil { + return nil + } + return t.applyTransform(span) +} + +// Error implements the FragmentIterator interface. +func (t *TransformerIter) Error() error { + if t.err != nil { + return t.err + } + return t.FragmentIterator.Error() +} + +// Close implements the FragmentIterator interface. +func (t *TransformerIter) Close() error { + err := t.FragmentIterator.Close() + if err != nil { + return err + } + return t.err +} diff --git a/internal/rangekey/coalesce.go b/internal/rangekey/coalesce.go index a25b64dfc4..661ecc4e18 100644 --- a/internal/rangekey/coalesce.go +++ b/internal/rangekey/coalesce.go @@ -367,3 +367,73 @@ func coalesce( } return nil } + +// ForeignSSTTransformer implements a keyspan.Transformer for range keys in +// foreign sstables (i.e. shared sstables not created by us). It is largely +// similar to the Transform function implemented in UserIteratorConfig in that +// it calls coalesce to remove range keys shadowed by other range keys, but also +// retains the range key that does the shadowing. In addition, it outputs range +// keys with sequence numbers that match reserved sequence numbers for that +// level (i.e. SeqNumL5RangeKeySet for L5 sets, while L6 unsets/dels are elided). +type ForeignSSTTransformer struct { + Comparer *base.Comparer + Level int + sortBuf keyspan.KeysBySuffix +} + +// Transform implements the Transformer interface. +func (f *ForeignSSTTransformer) Transform( + cmp base.Compare, s keyspan.Span, dst *keyspan.Span, +) error { + // Apply shadowing of keys. + dst.Start = s.Start + dst.End = s.End + f.sortBuf = keyspan.KeysBySuffix{ + Cmp: cmp, + Keys: f.sortBuf.Keys[:0], + } + if err := coalesce(f.Comparer.Equal, &f.sortBuf, math.MaxUint64, s.Keys); err != nil { + return err + } + keys := f.sortBuf.Keys + dst.Keys = dst.Keys[:0] + for i := range keys { + switch keys[i].Kind() { + case base.InternalKeyKindRangeKeySet: + if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { + panic("pebble: keys unexpectedly not in ascending suffix order") + } + dst.Keys = append(dst.Keys, keys[i]) + // Only mutate the trailer in the destination. + key := &dst.Keys[len(dst.Keys)-1] + switch f.Level { + case 5: + key.Trailer = base.MakeTrailer(base.SeqNumL5RangeKeySet, base.InternalKeyKindRangeKeySet) + case 6: + key.Trailer = base.MakeTrailer(base.SeqNumL6RangeKey, base.InternalKeyKindRangeKeySet) + } + case base.InternalKeyKindRangeKeyUnset: + if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { + panic("pebble: keys unexpectedly not in ascending suffix order") + } + fallthrough + case base.InternalKeyKindRangeKeyDelete: + switch f.Level { + case 5: + dst.Keys = append(dst.Keys, keys[i]) + // Only mutate the trailer in the destination. + key := &dst.Keys[len(dst.Keys)-1] + key.Trailer = base.MakeTrailer(base.SeqNumL5RangeKeyUnsetDel, keys[i].Kind()) + case 6: + // Skip this key, as foreign sstable in L6 do not need to emit range key + // unsets/dels as they do not apply to any other sstables. + continue + } + default: + return base.CorruptionErrorf("pebble: unrecognized range key kind %s", keys[i].Kind()) + } + } + // coalesce results in dst.Keys being sorted by Suffix. + dst.KeysOrder = keyspan.BySuffixAsc + return nil +} diff --git a/level_iter.go b/level_iter.go index feb809d064..32cc5ef774 100644 --- a/level_iter.go +++ b/level_iter.go @@ -38,7 +38,7 @@ type tableNewIters func( func tableNewRangeDelIter(ctx context.Context, newIters tableNewIters) keyspan.TableNewSpanIter { return func(file *manifest.FileMetadata, iterOptions *keyspan.SpanIterOptions) (keyspan.FragmentIterator, error) { iter, rangeDelIter, err := newIters( - ctx, file, &IterOptions{RangeKeyFilters: iterOptions.RangeKeyFilters}, internalIterOpts{}) + ctx, file, &IterOptions{RangeKeyFilters: iterOptions.RangeKeyFilters, level: iterOptions.Level}, internalIterOpts{}) if iter != nil { _ = iter.Close() } diff --git a/objstorage/objstorage.go b/objstorage/objstorage.go index e51c2ce76d..30e41d9174 100644 --- a/objstorage/objstorage.go +++ b/objstorage/objstorage.go @@ -209,6 +209,11 @@ type Provider interface { // Cannot be called if shared storage is not configured for the provider. SetCreatorID(creatorID CreatorID) error + // IsForeign returns whether this object is owned by a different node. Return + // value undefined if creator ID is not set yet, or if this object does not + // exist in this provider. + IsForeign(meta ObjectMetadata) bool + // SharedObjectBacking encodes the shared object metadata. SharedObjectBacking(meta *ObjectMetadata) (SharedObjectBackingHandle, error) diff --git a/objstorage/objstorageprovider/shared.go b/objstorage/objstorageprovider/shared.go index 8660c160b8..d2d7c08601 100644 --- a/objstorage/objstorageprovider/shared.go +++ b/objstorage/objstorageprovider/shared.go @@ -96,6 +96,14 @@ func (p *provider) SetCreatorID(creatorID objstorage.CreatorID) error { return nil } +// IsForeign is part of the objstorage.Provider interface. +func (p *provider) IsForeign(meta objstorage.ObjectMetadata) bool { + if !p.shared.initialized.Load() { + return false + } + return meta.IsShared() && p.shared.creatorID != meta.Shared.CreatorID +} + func (p *provider) sharedCheckInitialized() error { if p.sharedStorage() == nil { return errors.Errorf("shared object support not configured") diff --git a/options.go b/options.go index 0a4ef4dab8..d43b820b99 100644 --- a/options.go +++ b/options.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/cache" "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" "github.com/cockroachdb/pebble/objstorage/shared" "github.com/cockroachdb/pebble/sstable" @@ -222,6 +223,17 @@ func (o *IterOptions) getLogger() Logger { return o.logger } +// SpanIterOptions creates a SpanIterOptions from this IterOptions. +func (o *IterOptions) SpanIterOptions(level manifest.Level) *keyspan.SpanIterOptions { + if o == nil { + return &keyspan.SpanIterOptions{Level: level} + } + return &keyspan.SpanIterOptions{ + RangeKeyFilters: o.RangeKeyFilters, + Level: level, + } +} + // scanInternalOptions is similar to IterOptions, meant for use with // scanInternalIterator. type scanInternalOptions struct { @@ -1602,6 +1614,7 @@ func (o *Options) MakeReaderOptions() sstable.ReaderOptions { readerOpts.Comparer = o.Comparer readerOpts.Filters = o.Filters if o.Merger != nil { + readerOpts.Merge = o.Merger.Merge readerOpts.MergerName = o.Merger.Name } readerOpts.LoggerAndTracer = o.LoggerAndTracer diff --git a/range_keys.go b/range_keys.go index 30549d5ca3..888fb9807c 100644 --- a/range_keys.go +++ b/range_keys.go @@ -63,8 +63,7 @@ func (i *Iterator) constructRangeKeyIter() { // around Key Trailer order. iter := current.RangeKeyLevels[0].Iter() for f := iter.Last(); f != nil; f = iter.Prev() { - spanIterOpts := &keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} - spanIter, err := i.newIterRangeKey(f, spanIterOpts) + spanIter, err := i.newIterRangeKey(f, i.opts.SpanIterOptions(manifest.Level(0))) if err != nil { i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err}) continue @@ -78,8 +77,8 @@ func (i *Iterator) constructRangeKeyIter() { continue } li := i.rangeKey.iterConfig.NewLevelIter() - spanIterOpts := keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} - li.Init(spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), + spanIterOpts := i.opts.SpanIterOptions(manifest.Level(level)) + li.Init(*spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), manifest.Level(level), manifest.KeyTypeRange) i.rangeKey.iterConfig.AddLevel(li) } diff --git a/scan_internal.go b/scan_internal.go index ab9e0e62d6..e193a131f3 100644 --- a/scan_internal.go +++ b/scan_internal.go @@ -13,6 +13,7 @@ import ( "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/sstable" ) const ( @@ -92,8 +93,26 @@ type pcIterPos int const ( pcIterPosCur pcIterPos = iota pcIterPosNext + pcIterPosPrev ) +// pointCollapsingSSTIterator implements sstable.Iterator while composing +// pointCollapsingIterator. +type pointCollapsingSSTIterator struct { + pointCollapsingIterator + childIter sstable.Iterator +} + +// MaybeFilteredKeys implements the sstable.Iterator interface. +func (p *pointCollapsingSSTIterator) MaybeFilteredKeys() bool { + return p.childIter.MaybeFilteredKeys() +} + +// SetCloseHook implements the sstable.Iterator interface. +func (p *pointCollapsingSSTIterator) SetCloseHook(fn func(i sstable.Iterator) error) { + p.childIter.SetCloseHook(fn) +} + // pointCollapsingIterator is an internalIterator that collapses point keys and // returns at most one point internal key for each user key. Merges are merged, // sets are emitted as-is, and SingleDeletes are collapsed with the next point @@ -126,18 +145,33 @@ type pointCollapsingIterator struct { // the value of savedKey is undefined. iterKey *InternalKey savedKey InternalKey + // Saved key for substituting sequence numbers. Reused to avoid an allocation. + seqNumKey InternalKey + // elideRangeDeletes ignores range deletes returned by the interleaving + // iterator if true. + elideRangeDeletes bool // Value at the current iterator position, at iterKey. - value base.LazyValue + value base.LazyValue + savedValue base.LazyValue // Used for Merge keys only. valueMerger ValueMerger valueBuf []byte + // If fixedSeqNum is non-zero, all emitted points have this fixed sequence + // number. + fixedSeqNum uint64 } // SeekPrefixGE implements the InternalIterator interface. func (p *pointCollapsingIterator) SeekPrefixGE( prefix, key []byte, flags base.SeekGEFlags, ) (*base.InternalKey, base.LazyValue) { - panic("unimplemented") + p.resetKey() + p.iterKey, p.value = p.iter.SeekPrefixGE(prefix, key, flags) + p.pos = pcIterPosCur + if p.iterKey == nil { + return nil, base.LazyValue{} + } + return p.findNextEntry() } // SeekGE implements the InternalIterator interface. @@ -157,20 +191,37 @@ func (p *pointCollapsingIterator) SeekGE( func (p *pointCollapsingIterator) SeekLT( key []byte, flags base.SeekLTFlags, ) (*base.InternalKey, base.LazyValue) { - panic("unimplemented") + p.resetKey() + p.iterKey, p.value = p.iter.SeekLT(key, flags) + p.pos = pcIterPosCur + if p.iterKey == nil { + return nil, base.LazyValue{} + } + return p.findPrevEntry() } func (p *pointCollapsingIterator) resetKey() { p.savedKey.UserKey = p.savedKey.UserKey[:0] p.savedKey.Trailer = 0 + p.seqNumKey = InternalKey{} p.valueMerger = nil p.valueBuf = p.valueBuf[:0] p.iterKey = nil p.pos = pcIterPosCur } -// findNextEntry is called to return the next key. p.iter must be positioned at -// the start of the first user key we are interested in. +func (p *pointCollapsingIterator) subSeqNum(key *base.InternalKey) *base.InternalKey { + if p.fixedSeqNum == 0 || key == nil || key.Kind() == InternalKeyKindRangeDelete { + return key + } + // Reuse seqNumKey. This avoids an allocation. + p.seqNumKey.UserKey = key.UserKey + p.seqNumKey.Trailer = base.MakeTrailer(p.fixedSeqNum, key.Kind()) + return &p.seqNumKey +} + +// findNextEntry is called to return the next key. p.iter must be positioned at the +// start of the first user key we are interested in. func (p *pointCollapsingIterator) findNextEntry() (*base.InternalKey, base.LazyValue) { finishAndReturnMerge := func() (*base.InternalKey, base.LazyValue) { value, closer, err := p.valueMerger.Finish(true /* includesBase */) @@ -184,7 +235,7 @@ func (p *pointCollapsingIterator) findNextEntry() (*base.InternalKey, base.LazyV } p.valueMerger = nil newValue := base.MakeInPlaceValue(value) - return &p.savedKey, newValue + return p.subSeqNum(&p.savedKey), newValue } // saveKey sets p.iterKey (if not-nil) to &p.savedKey. We can use this equality @@ -240,7 +291,7 @@ func (p *pointCollapsingIterator) findNextEntry() (*base.InternalKey, base.LazyV // of blocks and can determine user key changes without doing key saves // or comparisons. p.pos = pcIterPosCur - return p.iterKey, p.value + return p.subSeqNum(p.iterKey), p.value case InternalKeyKindSingleDelete: // Panic, as this iterator is not expected to observe single deletes. panic("cannot process singledel key in point collapsing iterator") @@ -291,11 +342,17 @@ func (p *pointCollapsingIterator) findNextEntry() (*base.InternalKey, base.LazyV } p.iterKey, p.value = p.iter.Next() case InternalKeyKindRangeDelete: + if p.elideRangeDeletes { + // Skip this range delete, and process any point after it. + p.iterKey, p.value = p.iter.Next() + p.saveKey() + continue + } // These are interleaved by the interleaving iterator ahead of all points. // We should pass them as-is, but also account for any points ahead of // them. p.pos = pcIterPosCur - return p.iterKey, p.value + return p.subSeqNum(p.iterKey), p.value default: panic(fmt.Sprintf("unexpected kind: %d", p.iterKey.Kind())) } @@ -308,6 +365,149 @@ func (p *pointCollapsingIterator) findNextEntry() (*base.InternalKey, base.LazyV return nil, base.LazyValue{} } +// findPrevEntry finds the relevant point key to return for the previous user key +// (i.e. in reverse iteration). Requires that the iterator is already positioned +// at the first-in-reverse (i.e. rightmost / largest) internal key encountered +// for that user key. +func (p *pointCollapsingIterator) findPrevEntry() (*base.InternalKey, base.LazyValue) { + finishAndReturnMerge := func() (*base.InternalKey, base.LazyValue) { + value, closer, err := p.valueMerger.Finish(true /* includesBase */) + if err != nil { + p.err = err + return nil, base.LazyValue{} + } + p.valueBuf = append(p.valueBuf[:0], value...) + if closer != nil { + _ = closer.Close() + } + p.valueMerger = nil + p.savedValue = base.MakeInPlaceValue(p.valueBuf) + return p.subSeqNum(&p.savedKey), p.savedValue + } + + if p.iterKey == nil { + p.pos = pcIterPosCur + return nil, base.LazyValue{} + } + + // saveKey sets p.iterKey (if not-nil) to &p.savedKey. We can use this equality + // as a proxy to determine if we're at the first internal key for a user key. + p.saveKey() + for p.iterKey != nil { + // NB: p.savedKey is either the current key (iff p.iterKey == &p.savedKey), + // or the previous key. + if p.iterKey != &p.savedKey && !p.comparer.Equal(p.iterKey.UserKey, p.savedKey.UserKey) { + if p.valueMerger != nil { + if p.savedKey.Kind() != InternalKeyKindMerge { + panic(fmt.Sprintf("expected key %s to have MERGE kind", p.iterKey)) + } + p.pos = pcIterPosPrev + return finishAndReturnMerge() + } + p.pos = pcIterPosPrev + return p.subSeqNum(&p.savedKey), p.savedValue + } + if s := p.iter.Span(); s != nil && s.CoversAt(p.seqNum, p.iterKey.SeqNum()) { + // Skip this key. + p.iterKey, p.value = p.iter.Prev() + p.saveKey() + continue + } + switch p.iterKey.Kind() { + case InternalKeyKindSet, InternalKeyKindDelete, InternalKeyKindSetWithDelete: + p.saveKey() + // Copy value into p.savedValue. + value, callerOwned, err := p.value.Value(p.valueBuf[:0]) + if err != nil { + p.err = err + return nil, base.LazyValue{} + } + if !callerOwned { + p.valueBuf = append(p.valueBuf[:0], value...) + } else { + p.valueBuf = value + } + p.valueMerger = nil + p.savedValue = base.MakeInPlaceValue(p.valueBuf) + p.iterKey, p.value = p.iter.Prev() + continue + case InternalKeyKindSingleDelete: + // Panic, as this iterator is not expected to observe single deletes. + panic("cannot process singledel key in point collapsing iterator") + case InternalKeyKindMerge: + if p.valueMerger == nil { + // Set up merger. This is the first Merge key encountered. + value, _, err := p.value.Value(nil /* buf */) + if err != nil { + p.err = err + return nil, base.LazyValue{} + } + p.valueMerger, err = p.merge(p.iterKey.UserKey, value) + if err != nil { + p.err = err + return nil, base.LazyValue{} + } + if p.iterKey != &p.savedKey && (p.savedKey.Kind() == InternalKeyKindSet || p.savedKey.Kind() == InternalKeyKindSetWithDelete) { + if err := p.valueMerger.MergeOlder(p.savedValue.InPlaceValue()); err != nil { + p.err = err + return nil, base.LazyValue{} + } + } + p.saveKey() + p.savedValue = base.LazyValue{} + p.iterKey, p.value = p.iter.Prev() + continue + } + value, callerOwned, err := p.value.Value(p.valueBuf[:0]) + if err != nil { + p.err = err + return nil, base.LazyValue{} + } + if !callerOwned { + p.valueBuf = append(p.valueBuf[:0], value...) + } else { + p.valueBuf = value + } + if err := p.valueMerger.MergeNewer(value); err != nil { + p.err = err + return nil, base.LazyValue{} + } + p.iterKey, p.value = p.iter.Prev() + p.savedValue = base.LazyValue{} + continue + case InternalKeyKindRangeDelete: + if p.elideRangeDeletes { + // Skip this range delete, and process any point before it. + p.iterKey, p.value = p.iter.Prev() + continue + } + // These are interleaved by the interleaving iterator behind all points. + // There are two cases: one is where there's a key behind us that we + // should return (&p.savedKey != p.iterKey). The other is where this is the + // only rangedel key and we return it as-is. + if p.iterKey != &p.savedKey { + p.pos = pcIterPosPrev + if p.valueMerger != nil { + if p.savedKey.Kind() != InternalKeyKindMerge { + panic(fmt.Sprintf("expected key %s to have MERGE kind", p.iterKey)) + } + return finishAndReturnMerge() + } + return p.subSeqNum(&p.savedKey), p.savedValue + } + p.pos = pcIterPosCur + return p.iterKey, p.value + default: + panic(fmt.Sprintf("unexpected kind: %d", p.iterKey.Kind())) + } + } + p.pos = pcIterPosPrev + if p.valueMerger != nil { + return finishAndReturnMerge() + } + return &p.savedKey, p.savedValue +} + // First implements the InternalIterator interface. func (p *pointCollapsingIterator) First() (*base.InternalKey, base.LazyValue) { p.resetKey() @@ -321,7 +521,13 @@ func (p *pointCollapsingIterator) First() (*base.InternalKey, base.LazyValue) { // Last implements the InternalIterator interface. func (p *pointCollapsingIterator) Last() (*base.InternalKey, base.LazyValue) { - panic("unimplemented") + p.resetKey() + p.iterKey, p.value = p.iter.Last() + p.pos = pcIterPosCur + if p.iterKey == nil { + return nil, base.LazyValue{} + } + return p.findPrevEntry() } func (p *pointCollapsingIterator) saveKey() { @@ -339,9 +545,34 @@ func (p *pointCollapsingIterator) saveKey() { // Next implements the InternalIterator interface. func (p *pointCollapsingIterator) Next() (*base.InternalKey, base.LazyValue) { switch p.pos { + case pcIterPosPrev: + p.saveKey() + if p.iterKey != nil && p.iterKey.Kind() == InternalKeyKindRangeDelete && !p.elideRangeDeletes { + p.iterKey, p.value = p.iter.Next() + p.pos = pcIterPosCur + } else { + // Fast forward to the next user key. p.iterKey stays at p.savedKey for + // this loop. + key, val := p.iter.Next() + // p.iterKey.SeqNum() >= key.SeqNum() is an optimization that allows us to + // use p.iterKey.SeqNum() < key.SeqNum() as a sign that the user key has + // changed, without needing to do the full key comparison. + for p.iterKey != nil && key != nil && p.iterKey.SeqNum() >= key.SeqNum() && + p.comparer.Equal(p.iterKey.UserKey, key.UserKey) { + key, val = p.iter.Next() + } + if key == nil { + // There are no keys to return. + p.resetKey() + return nil, base.LazyValue{} + } + p.iterKey, p.value = key, val + p.pos = pcIterPosCur + } + fallthrough case pcIterPosCur: p.saveKey() - if p.iterKey != nil && p.iterKey.Kind() == InternalKeyKindRangeDelete { + if p.iterKey != nil && p.iterKey.Kind() == InternalKeyKindRangeDelete && !p.elideRangeDeletes { // Step over the interleaved range delete and process the very next // internal key, even if it's at the same user key. This is because a // point for that user key has not been returned yet. @@ -376,12 +607,56 @@ func (p *pointCollapsingIterator) Next() (*base.InternalKey, base.LazyValue) { // NextPrefix implements the InternalIterator interface. func (p *pointCollapsingIterator) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + // TODO(bilal): Implement this. It'll be similar to SeekGE, except we'll call + // the child iterator's NextPrefix, and have some special logic in case pos + // is pcIterPosNext. panic("unimplemented") } // Prev implements the InternalIterator interface. func (p *pointCollapsingIterator) Prev() (*base.InternalKey, base.LazyValue) { - panic("unimplemented") + switch p.pos { + case pcIterPosNext: + // Rewind backwards to the previous iter key. + p.saveKey() + key, val := p.iter.Prev() + for p.iterKey != nil && key != nil && p.iterKey.SeqNum() <= key.SeqNum() && + p.comparer.Equal(p.iterKey.UserKey, key.UserKey) { + if key.Kind() == InternalKeyKindRangeDelete && !p.elideRangeDeletes { + // We need to pause at this range delete and return it as-is, as "cur" + // is referencing the point key after it, not the range delete. + break + } + key, val = p.iter.Prev() + } + p.iterKey = key + p.value = val + p.pos = pcIterPosCur + fallthrough + case pcIterPosCur: + p.saveKey() + key, val := p.iter.Prev() + for p.iterKey != nil && key != nil && p.iterKey.SeqNum() <= key.SeqNum() && + p.comparer.Equal(p.iterKey.UserKey, key.UserKey) { + if key.Kind() == InternalKeyKindRangeDelete && !p.elideRangeDeletes { + // We need to pause at this range delete and return it as-is, as "cur" + // is referencing the point key after it, not the range delete. + break + } + key, val = p.iter.Prev() + } + p.iterKey = key + p.value = val + p.pos = pcIterPosCur + case pcIterPosPrev: + // Do nothing. + p.pos = pcIterPosCur + } + if p.iterKey == nil { + p.resetKey() + return nil, base.LazyValue{} + } + return p.findPrevEntry() } // Error implements the InternalIterator interface. @@ -494,7 +769,7 @@ func (d *DB) truncateSharedFile( rangeDelIter = keyspan.Truncate(cmp, rangeDelIter, lower, upper, nil, nil) defer rangeDelIter.Close() } - rangeKeyIter, err := d.tableNewRangeKeyIter(file, nil /* spanIterOptions */) + rangeKeyIter, err := d.tableNewRangeKeyIter(file, &keyspan.SpanIterOptions{Level: manifest.Level(level)}) if err != nil { return nil, false, err } @@ -798,8 +1073,7 @@ func (i *scanInternalIterator) constructRangeKeyIter() { // around Key Trailer order. iter := current.RangeKeyLevels[0].Iter() for f := iter.Last(); f != nil; f = iter.Prev() { - spanIterOpts := &keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} - spanIter, err := i.newIterRangeKey(f, spanIterOpts) + spanIter, err := i.newIterRangeKey(f, i.opts.SpanIterOptions(manifest.Level(0))) if err != nil { i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err}) continue @@ -816,8 +1090,8 @@ func (i *scanInternalIterator) constructRangeKeyIter() { continue } li := i.rangeKey.iterConfig.NewLevelIter() - spanIterOpts := keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} - li.Init(spanIterOpts, i.comparer.Compare, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), + spanIterOpts := i.opts.SpanIterOptions(manifest.Level(level)) + li.Init(*spanIterOpts, i.comparer.Compare, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), manifest.Level(level), manifest.KeyTypeRange) i.rangeKey.iterConfig.AddLevel(li) } diff --git a/scan_internal_test.go b/scan_internal_test.go index 2ecbca01c6..37594c2ac3 100644 --- a/scan_internal_test.go +++ b/scan_internal_test.go @@ -7,6 +7,7 @@ package pebble import ( "context" "fmt" + "math" "strconv" "strings" "testing" @@ -267,3 +268,62 @@ func TestScanInternal(t *testing.T) { } }) } + +func TestPointCollapsingIter(t *testing.T) { + var def string + datadriven.RunTest(t, "testdata/point_collapsing_iter", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + def = d.Input + return "" + + case "iter": + var elideRangeDels bool + for i := range d.CmdArgs { + switch d.CmdArgs[i].Key { + case "elide-range-dels": + var err error + elideRangeDels, err = strconv.ParseBool(d.CmdArgs[i].Vals[0]) + if err != nil { + return err.Error() + } + } + } + f := &fakeIter{} + var spans []keyspan.Span + for _, line := range strings.Split(def, "\n") { + for _, key := range strings.Fields(line) { + j := strings.Index(key, ":") + k := base.ParseInternalKey(key[:j]) + v := []byte(key[j+1:]) + if k.Kind() == InternalKeyKindRangeDelete { + spans = append(spans, keyspan.Span{ + Start: k.UserKey, + End: v, + Keys: []keyspan.Key{{Trailer: k.Trailer}}, + KeysOrder: 0, + }) + continue + } + f.keys = append(f.keys, k) + f.vals = append(f.vals, v) + } + } + + ksIter := keyspan.NewIter(base.DefaultComparer.Compare, spans) + pcIter := &pointCollapsingIterator{ + comparer: base.DefaultComparer, + merge: base.DefaultMerger.Merge, + seqNum: math.MaxUint64, + elideRangeDeletes: elideRangeDels, + } + pcIter.iter.Init(base.DefaultComparer, f, ksIter, nil /* mask */, nil, nil) + defer pcIter.Close() + + return runInternalIterCmd(t, d, pcIter, iterCmdVerboseKey) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/sstable/options.go b/sstable/options.go index 66dbdd4d0a..869e69b17c 100644 --- a/sstable/options.go +++ b/sstable/options.go @@ -107,6 +107,9 @@ type ReaderOptions struct { // The default value uses the same ordering as bytes.Compare. Comparer *Comparer + // Merge defines the Merge function in use for this keyspace. + Merge base.Merge + // Filters is a map from filter policy name to filter policy. It is used for // debugging tools which may be used on multiple databases configured with // different filter policies. It is not necessary to populate this filters @@ -126,6 +129,9 @@ func (o ReaderOptions) ensureDefaults() ReaderOptions { if o.Comparer == nil { o.Comparer = base.DefaultComparer } + if o.Merge == nil { + o.Merge = base.DefaultMerger.Merge + } if o.MergerName == "" { o.MergerName = base.DefaultMerger.Name } diff --git a/sstable/reader.go b/sstable/reader.go index 4a79d519ed..116d76b376 100644 --- a/sstable/reader.go +++ b/sstable/reader.go @@ -2709,6 +2709,17 @@ func (r *Reader) NewCompactionIter(bytesIterated *uint64, rp ReaderProvider) (It // TODO(sumeer): plumb context.Context since this path is relevant in the user-facing // iterator. Add WithContext methods since the existing ones are public. func (r *Reader) NewRawRangeDelIter() (keyspan.FragmentIterator, error) { + return r.NewFixedSeqnumRangeDelIter(r.Properties.GlobalSeqNum) +} + +// NewFixedSeqnumRangeDelIter returns an internal iterator for the contents of +// the range-del block of the table, with a custom sequence number to be used as +// the global sequence number for this block. Returns nil if the table does not +// contain any range deletions. +// +// TODO(sumeer): plumb context.Context since this path is relevant in the user-facing +// iterator. Add WithContext methods since the existing ones are public. +func (r *Reader) NewFixedSeqnumRangeDelIter(seqNum uint64) (keyspan.FragmentIterator, error) { if r.rangeDelBH.Length == 0 { return nil, nil } @@ -2717,7 +2728,7 @@ func (r *Reader) NewRawRangeDelIter() (keyspan.FragmentIterator, error) { return nil, err } i := &fragmentBlockIter{} - if err := i.blockIter.initHandle(r.Compare, h, r.Properties.GlobalSeqNum); err != nil { + if err := i.blockIter.initHandle(r.Compare, h, seqNum); err != nil { return nil, err } return i, nil diff --git a/table_cache.go b/table_cache.go index 68157e41f2..6019bfb95c 100644 --- a/table_cache.go +++ b/table_cache.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "io" + "math" "runtime/debug" "runtime/pprof" "sync" @@ -21,6 +22,7 @@ import ( "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" "github.com/cockroachdb/pebble/internal/private" + "github.com/cockroachdb/pebble/internal/rangekey" "github.com/cockroachdb/pebble/objstorage" "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing" "github.com/cockroachdb/pebble/sstable" @@ -370,9 +372,34 @@ func (c *tableCacheShard) newIters( return nil, nil, err } + // Check if this file is a foreign file. + provider := dbOpts.objProvider + objMeta, err := provider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum) + if err != nil { + return nil, nil, err + } + if objMeta.IsShared() && opts == nil { + panic("unexpected nil opts when reading shared file") + } + // NB: range-del iterator does not maintain a reference to the table, nor // does it need to read from it after creation. - rangeDelIter, err := v.reader.NewRawRangeDelIter() + var rangeDelIter keyspan.FragmentIterator + if provider.IsForeign(objMeta) { + switch manifest.LevelToInt(opts.level) { + case 5: + rangeDelIter, err = v.reader.NewFixedSeqnumRangeDelIter(base.SeqNumL5RangeDel) + case 6: + // Let rangeDelIter remain nil. We don't need to return rangedels from + // this file as they will not apply to any other files. For the purpose + // of collapsing rangedels within this file, we create another rangeDelIter + // below for use with the interleaving iter. + default: + panic(fmt.Sprintf("unexpected level for foreign sstable: %d", manifest.LevelToInt(opts.level))) + } + } else { + rangeDelIter, err = v.reader.NewRawRangeDelIter() + } if err != nil { c.unrefValue(v) return nil, nil, err @@ -424,6 +451,33 @@ func (c *tableCacheShard) newIters( // NB: v.closeHook takes responsibility for calling unrefValue(v) here. Take // care to avoid introducing an allocation here by adding a closure. iter.SetCloseHook(v.closeHook) + if provider.IsForeign(objMeta) { + // NB: IsForeign() guarantees IsShared, so opts must not be nil as we've + // already panicked on the nil case above. + pointKeySeqNum := base.PointSeqNumForLevel(manifest.LevelToInt(opts.level)) + pcIter := pointCollapsingIterator{ + comparer: dbOpts.opts.Comparer, + merge: dbOpts.opts.Merge, + seqNum: math.MaxUint64, + elideRangeDeletes: true, + fixedSeqNum: pointKeySeqNum, + } + // Open a second rangedel iter. This is solely for the interleaving iter to + // be able to efficiently delete covered range deletes. We don't need to fix + // the sequence number in this iter, as these range deletes will not be + // exposed to anything other than the interleaving iter and + // pointCollapsingIter. + rangeDelIter, err := v.reader.NewRawRangeDelIter() + if err != nil { + c.unrefValue(v) + return nil, nil, err + } + if rangeDelIter == nil { + rangeDelIter = emptyKeyspanIter + } + pcIter.iter.Init(dbOpts.opts.Comparer, iter, rangeDelIter, nil /* mask */, opts.LowerBound, opts.UpperBound) + iter = &pointCollapsingSSTIterator{pointCollapsingIterator: pcIter, childIter: iter} + } c.iterCount.Add(1) dbOpts.iterCount.Add(1) @@ -485,6 +539,29 @@ func (c *tableCacheShard) newRangeKeyIter( return emptyKeyspanIter, nil } + objMeta, err := dbOpts.objProvider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum) + if err != nil { + return nil, err + } + if objMeta.IsShared() && opts == nil { + panic("unexpected nil opts when reading shared file") + } + if dbOpts.objProvider.IsForeign(objMeta) { + transform := &rangekey.ForeignSSTTransformer{ + Comparer: dbOpts.opts.Comparer, + Level: manifest.LevelToInt(opts.Level), + } + if iter == nil { + iter = emptyKeyspanIter + } + transformIter := &keyspan.TransformerIter{ + FragmentIterator: iter, + Transformer: transform, + Compare: dbOpts.opts.Comparer.Compare, + } + return transformIter, nil + } + return iter, nil } diff --git a/testdata/event_listener b/testdata/event_listener index 2a0c35c842..a6960af931 100644 --- a/testdata/event_listener +++ b/testdata/event_listener @@ -239,7 +239,7 @@ compact 1 2.3 K 0 B 0 (size == esti zmemtbl 0 0 B ztbl 0 0 B bcache 8 1.4 K 11.1% (score == hit-rate) - tcache 1 720 B 40.0% (score == hit-rate) + tcache 1 728 B 40.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 0 filter - - 0.0% (score == utility) @@ -318,7 +318,7 @@ compact 1 4.7 K 0 B 0 (size == esti zmemtbl 0 0 B ztbl 0 0 B bcache 16 2.9 K 14.3% (score == hit-rate) - tcache 1 720 B 50.0% (score == hit-rate) + tcache 1 728 B 50.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 0 filter - - 0.0% (score == utility) diff --git a/testdata/ingest b/testdata/ingest index 6ef4e3b218..b25433b81a 100644 --- a/testdata/ingest +++ b/testdata/ingest @@ -48,7 +48,7 @@ compact 0 0 B 0 B 0 (size == esti zmemtbl 0 0 B ztbl 0 0 B bcache 8 1.5 K 42.9% (score == hit-rate) - tcache 1 720 B 50.0% (score == hit-rate) + tcache 1 728 B 50.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 0 filter - - 0.0% (score == utility) diff --git a/testdata/metrics b/testdata/metrics index 5a7a840313..464be0f7c0 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -34,7 +34,7 @@ compact 0 0 B 0 B 0 (size == esti zmemtbl 1 256 K ztbl 0 0 B bcache 4 697 B 0.0% (score == hit-rate) - tcache 1 720 B 0.0% (score == hit-rate) + tcache 1 728 B 0.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 1 filter - - 0.0% (score == utility) @@ -145,7 +145,7 @@ compact 1 0 B 0 B 0 (size == esti zmemtbl 1 256 K ztbl 1 770 B bcache 4 697 B 42.9% (score == hit-rate) - tcache 1 720 B 66.7% (score == hit-rate) + tcache 1 728 B 66.7% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 1 filter - - 0.0% (score == utility) diff --git a/testdata/point_collapsing_iter b/testdata/point_collapsing_iter new file mode 100644 index 0000000000..47fcc7f51c --- /dev/null +++ b/testdata/point_collapsing_iter @@ -0,0 +1,173 @@ + +define +a.SET.5:foo +b.SET.6:foo +b.DEL.4: +c.SET.7:bar +c.SET.5:foo +---- + +iter +first +next +next +prev +next +next +next +prev +---- +a#5,1:foo +b#6,1:foo +c#7,1:bar +b#6,1:foo +c#7,1:bar +. +. +c#7,1:bar + +iter +last +prev +prev +prev +prev +prev +---- +c#7,1:bar +b#6,1:foo +a#5,1:foo +. +. +. + +# Ensure that we pause at (and return) rangedel start points correctly. + +define +a.RANGEDEL.4:b +a.SET.5:foo +b.RANGEDEL.3:c +b.SET.6:foo +b.DEL.4: +c.SET.7:bar +c.SET.5:foo +---- + +iter +seek-ge b +next +next +prev +prev +prev +---- +b#72057594037927935,15: +b#6,1:foo +c#7,1:bar +b#6,1:foo +b#72057594037927935,15: +a#5,1:foo + +iter elide-range-dels=true +seek-ge b +next +next +prev +prev +prev +---- +b#6,1:foo +c#7,1:bar +. +c#7,1:bar +b#6,1:foo +a#5,1:foo + +# Ensure that the merge stops at the rangedel for b. + +define +a.RANGEDEL.4:b +a.SET.5:foo +b.RANGEDEL.4:c +b.MERGE.8:bar +b.MERGE.6:foobaz +b.SET.3:foo +b.DEL.2: +c.SET.7:bar +c.SET.5:foo +---- + +iter +seek-ge a +next +next +next +next +---- +a#72057594037927935,15: +a#5,1:foo +b#72057594037927935,15: +b#8,2:foobazbar +c#7,1:bar + +iter +last +prev +prev +prev +prev +---- +c#7,1:bar +b#6,2:foobazbar +b#72057594037927935,15: +a#5,1:foo +a#72057594037927935,15: + +iter elide-range-dels=true +first +next +next +next +---- +a#5,1:foo +b#8,2:foobazbar +c#7,1:bar +. + +iter +seek-lt cc +prev +prev +prev +next +prev +prev +prev +next +next +---- +c#7,1:bar +b#6,2:foobazbar +b#72057594037927935,15: +a#5,1:foo +b#72057594037927935,15: +a#5,1:foo +a#72057594037927935,15: +. +a#72057594037927935,15: +a#5,1:foo + +iter elide-range-dels=true +seek-lt cc +prev +prev +next +prev +prev +---- +c#7,1:bar +b#6,2:foobazbar +a#5,1:foo +b#8,2:foobazbar +a#5,1:foo +.