From aeb26d7ee20c8552ee4b0194c9dd320e30703acd Mon Sep 17 00:00:00 2001 From: "Artur M. Wolff" Date: Fri, 23 Sep 2022 18:12:07 +0200 Subject: [PATCH] pkg/auth/badgerauth: make readAvailableClocks faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change - enables compaction of L0 on exit to make sure reads are efficient when the database is opened later - disables BlockCache - unnecessary overhead when compression and encryption are disabled - changes iterator in readAvailableClocks not to prefetch values - https://pprof.host/28/flamegraph Benchmark: name old time/op new time/op delta ReadAvailableClocks-8 55.9µs ±18% 3.3µs ± 4% -94.08% (p=0.008 n=5+5) Closes #253 Change-Id: Ibee59d0130aca795bcc29129e0d9c416619bcf53 --- pkg/auth/badgerauth/clock.go | 3 + pkg/auth/badgerauth/clock_test.go | 100 ++++++++++++++++++++++++++++++ pkg/auth/badgerauth/db.go | 4 ++ 3 files changed, 107 insertions(+) diff --git a/pkg/auth/badgerauth/clock.go b/pkg/auth/badgerauth/clock.go index 721484de..4241b2e4 100644 --- a/pkg/auth/badgerauth/clock.go +++ b/pkg/auth/badgerauth/clock.go @@ -88,6 +88,9 @@ func readAvailableClocks(txn *badger.Txn) (map[NodeID]Clock, error) { idToClock := make(map[NodeID]Clock) opt := badger.DefaultIteratorOptions + // It's much faster without prefetching values, probably because we discard + // a lot of them anyway. + opt.PrefetchValues = false opt.Prefix = []byte(clockPrefix) it := txn.NewIterator(opt) diff --git a/pkg/auth/badgerauth/clock_test.go b/pkg/auth/badgerauth/clock_test.go index 138b4c7d..899aa2ae 100644 --- a/pkg/auth/badgerauth/clock_test.go +++ b/pkg/auth/badgerauth/clock_test.go @@ -5,13 +5,17 @@ package badgerauth import ( "testing" + "time" badger "github.com/outcaste-io/badger/v3" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap/zaptest" + "storj.io/common/testcontext" "storj.io/common/testrand" + "storj.io/gateway-mt/pkg/auth/authdb" + "storj.io/gateway-mt/pkg/auth/badgerauth/pb" ) func TestAdvanceClock(t *testing.T) { @@ -86,3 +90,99 @@ func TestReadAvailableClocks(t *testing.T) { assert.Equal(t, expectedClocks, actualClocks) } + +var readAvailableClocksResult map[NodeID]Clock + +func BenchmarkReadAvailableClocks(b *testing.B) { + ctx := testcontext.New(b) + defer ctx.Cleanup() + + logger := zaptest.NewLogger(b) + defer ctx.Check(logger.Sync) + + db, err := OpenDB(logger, Config{ + ID: NodeID{0}, + FirstStart: true, + Path: ctx.Dir("database"), + }) + require.NoError(b, err) + defer ctx.Check(db.Close) + + // Prepare data in similar proportions as they are in prod: + // + // Batch 1x, 2x, and 100x records for IDs 0, 1, and 2 (representing regions) + // into 10 TXs because they won't fit into one TX. + udb := db.UnderlyingDB() + + for i := 0; i < 10; i++ { + require.NoError(b, udb.Update(func(txn *badger.Txn) error { + for j := 0; j < 3; j++ { + if err = ensureClock(txn, NodeID{byte(j)}); err != nil { + return err + } + } + + record := &pb.Record{ + CreatedAtUnix: time.Now().Unix(), + Public: true, + SatelliteAddress: "bench", + MacaroonHead: []byte{'b', 'e', 'n', 'c', 'h'}, + ExpiresAtUnix: time.Now().Add(time.Hour).Unix(), + EncryptedSecretKey: []byte{'b', 'e', 'n', 'c', 'h'}, + EncryptedAccessGrant: []byte{'b', 'e', 'n', 'c', 'h'}, + State: pb.Record_CREATED, + } + + for j := 0; j < 200; j++ { + var keyHash authdb.KeyHash + if err = keyHash.SetBytes(testrand.RandAlphaNumeric(32)); err != nil { + return err + } + record.CreatedAtUnix = time.Now().Unix() + record.ExpiresAtUnix = time.Now().Add(time.Hour).Unix() + if err = InsertRecord(logger, txn, NodeID{0}, keyHash, record); err != nil { + return err + } + } + for j := 0; j < 400; j++ { + var keyHash authdb.KeyHash + if err = keyHash.SetBytes(testrand.RandAlphaNumeric(32)); err != nil { + return err + } + record.CreatedAtUnix = time.Now().Unix() + record.ExpiresAtUnix = time.Now().Add(time.Hour).Unix() + if err = InsertRecord(logger, txn, NodeID{1}, keyHash, record); err != nil { + return err + } + } + for j := 0; j < 20000; j++ { + var keyHash authdb.KeyHash + if err = keyHash.SetBytes(testrand.RandAlphaNumeric(32)); err != nil { + return err + } + record.CreatedAtUnix = time.Now().Unix() + record.ExpiresAtUnix = time.Now().Add(time.Hour).Unix() + if err = InsertRecord(logger, txn, NodeID{2}, keyHash, record); err != nil { + return err + } + } + + return nil + })) + } + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + var result map[NodeID]Clock + // Always record the result of readAvailableClocks to prevent the + // compiler eliminating the function call. + require.NoError(b, udb.View(func(txn *badger.Txn) error { + result, err = readAvailableClocks(txn) + return err + })) + // Always store the result to a package level variable so the compiler + // cannot eliminate the Benchmark itself. + readAvailableClocksResult = result + } +} diff --git a/pkg/auth/badgerauth/db.go b/pkg/auth/badgerauth/db.go index 2db56eea..a7d1a723 100644 --- a/pkg/auth/badgerauth/db.go +++ b/pkg/auth/badgerauth/db.go @@ -63,9 +63,13 @@ func OpenDB(log *zap.Logger, config Config) (*DB, error) { // We want to fsync after each write to ensure we don't lose data: opt = opt.WithSyncWrites(true) + opt = opt.WithCompactL0OnClose(true) // Currently, we don't want to compress because authservice is mostly // deployed in environments where filesystem-level compression is on: opt = opt.WithCompression(options.None) + // If compression and encryption are disabled, adding a cache will lead to + // unnecessary overhead affecting read performance. Let's disable it then: + opt = opt.WithBlockCacheSize(0) opt = opt.WithLogger(badgerLogger{log.Sugar().Named("storage")}) var err error