From 26611ea8121ccbc60ca852ef37cb58f3e891a053 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 9 Dec 2024 00:15:38 -0800 Subject: [PATCH 01/12] go.mod: update btcwallet to latest to eliminate waddrmgr deadlock --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index bbb421de40..98374d814b 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 github.com/btcsuite/btclog v0.0.0-20241003133417-09c4e92e319c github.com/btcsuite/btclog/v2 v2.0.0 - github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa + github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63 github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5 github.com/btcsuite/btcwallet/wallet/txrules v1.2.2 github.com/btcsuite/btcwallet/walletdb v1.4.4 diff --git a/go.sum b/go.sum index 4c452df735..d8d3a112e7 100644 --- a/go.sum +++ b/go.sum @@ -95,8 +95,8 @@ github.com/btcsuite/btclog v0.0.0-20241003133417-09c4e92e319c/go.mod h1:w7xnGOhw github.com/btcsuite/btclog/v2 v2.0.0 h1:ZfOBItEeLWfU0voi88K72j8vtxP4/dHhxRFf2bxZkVo= github.com/btcsuite/btclog/v2 v2.0.0/go.mod h1:XItGUfVOxotJL8kkuk2Hj3EVow5KCugXl3wWfQ6K0AE= github.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= -github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa h1:x7vYpwkPL5zeJEWPPaRunybH9ERRMGWeNf7x/0aU/38= -github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa/go.mod h1:1HJXYbjJzgumlnxOC2+ViR1U+gnHWoOn7WeK5OfY1eU= +github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63 h1:YN+PekOLlLoGxE3P5RJaGgodZD5DDJSU8eXQZVwwCxM= +github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63/go.mod h1:1HJXYbjJzgumlnxOC2+ViR1U+gnHWoOn7WeK5OfY1eU= github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5 h1:Rr0njWI3r341nhSPesKQ2JF+ugDSzdPoeckS75SeDZk= github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5/go.mod h1:+tXJ3Ym0nlQc/iHSwW1qzjmPs3ev+UVWMbGgfV1OZqU= github.com/btcsuite/btcwallet/wallet/txrules v1.2.2 h1:YEO+Lx1ZJJAtdRrjuhXjWrYsmAk26wLTlNzxt2q0lhk= From 6af70e4bce95eab3c3c3dea2ea69d9f4b753deaf Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 30 Oct 2024 15:01:59 -0700 Subject: [PATCH 02/12] go.mod: use local kvdb to reapply removal of global postgres lock --- go.mod | 3 +++ go.sum | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 98374d814b..52aaf2b9b7 100644 --- a/go.mod +++ b/go.mod @@ -203,6 +203,9 @@ replace github.com/ulikunitz/xz => github.com/ulikunitz/xz v0.5.11 // https://deps.dev/advisory/OSV/GO-2021-0053?from=%2Fgo%2Fgithub.com%252Fgogo%252Fprotobuf%2Fv1.3.1 replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 +// Use local kvdb package until new version is tagged. +replace github.com/lightningnetwork/lnd/kvdb => ./kvdb + // We want to format raw bytes as hex instead of base64. The forked version // allows us to specify that as an option. replace google.golang.org/protobuf => github.com/lightninglabs/protobuf-go-hex-display v1.30.0-hex-display diff --git a/go.sum b/go.sum index d8d3a112e7..eb81c345ac 100644 --- a/go.sum +++ b/go.sum @@ -460,8 +460,6 @@ github.com/lightningnetwork/lnd/fn/v2 v2.0.2 h1:M7o2lYrh/zCp+lntPB3WP/rWTu5U+4ss github.com/lightningnetwork/lnd/fn/v2 v2.0.2/go.mod h1:TOzwrhjB/Azw1V7aa8t21ufcQmdsQOQMDtxVOQWNl8s= github.com/lightningnetwork/lnd/healthcheck v1.2.6 h1:1sWhqr93GdkWy4+6U7JxBfcyZIE78MhIHTJZfPx7qqI= github.com/lightningnetwork/lnd/healthcheck v1.2.6/go.mod h1:Mu02um4CWY/zdTOvFje7WJgJcHyX2zq/FG3MhOAiGaQ= -github.com/lightningnetwork/lnd/kvdb v1.4.11 h1:fk1HMVFrsVK3xqU7q+JWHRgBltw/a2qIg1E3zazMb/8= -github.com/lightningnetwork/lnd/kvdb v1.4.11/go.mod h1:a5cMDKMjbJA8dD06ZqqnYkmSh5DhEbbG8C1YHM3NN+k= github.com/lightningnetwork/lnd/queue v1.1.1 h1:99ovBlpM9B0FRCGYJo6RSFDlt8/vOkQQZznVb18iNMI= github.com/lightningnetwork/lnd/queue v1.1.1/go.mod h1:7A6nC1Qrm32FHuhx/mi1cieAiBZo5O6l8IBIoQxvkz4= github.com/lightningnetwork/lnd/sqldb v1.0.5 h1:ax5vBPf44tN/uD6C5+hBPBjOJ7cRMrUL+sVOdzmLVt4= From 5a55c250d01a1379d2764f48ac17cd35d55ae14a Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 30 Oct 2024 14:40:23 -0700 Subject: [PATCH 03/12] Reapply "kvdb/postgres: remove global application level lock" This reverts commit 67419a7c0c6410761ee369c1d24aba8641b8e400. --- kvdb/postgres/db.go | 1 - kvdb/sqlbase/db.go | 8 ------- kvdb/sqlbase/readwrite_tx.go | 44 ------------------------------------ 3 files changed, 53 deletions(-) diff --git a/kvdb/postgres/db.go b/kvdb/postgres/db.go index 90ca8324a8..425ba16225 100644 --- a/kvdb/postgres/db.go +++ b/kvdb/postgres/db.go @@ -28,7 +28,6 @@ func newPostgresBackend(ctx context.Context, config *Config, prefix string) ( Schema: "public", TableNamePrefix: prefix, SQLiteCmdReplacements: sqliteCmdReplacements, - WithTxLevelLock: true, } return sqlbase.NewSqlBackend(ctx, cfg) diff --git a/kvdb/sqlbase/db.go b/kvdb/sqlbase/db.go index 221a77bfd6..6ef085712a 100644 --- a/kvdb/sqlbase/db.go +++ b/kvdb/sqlbase/db.go @@ -55,10 +55,6 @@ type Config struct { // commands. Note that the sqlite keywords to be replaced are // case-sensitive. SQLiteCmdReplacements SQLiteCmdReplacements - - // WithTxLevelLock when set will ensure that there is a transaction - // level lock. - WithTxLevelLock bool } // db holds a reference to the sql db connection. @@ -79,10 +75,6 @@ type db struct { // db is the underlying database connection instance. db *sql.DB - // lock is the global write lock that ensures single writer. This is - // only used if cfg.WithTxLevelLock is set. - lock sync.RWMutex - // table is the name of the table that contains the data for all // top-level buckets that have keys that cannot be mapped to a distinct // sql table. diff --git a/kvdb/sqlbase/readwrite_tx.go b/kvdb/sqlbase/readwrite_tx.go index ec761931ad..18a6a682c9 100644 --- a/kvdb/sqlbase/readwrite_tx.go +++ b/kvdb/sqlbase/readwrite_tx.go @@ -5,7 +5,6 @@ package sqlbase import ( "context" "database/sql" - "sync" "github.com/btcsuite/btcwallet/walletdb" ) @@ -20,28 +19,11 @@ type readWriteTx struct { // active is true if the transaction hasn't been committed yet. active bool - - // locker is a pointer to the global db lock. - locker sync.Locker } // newReadWriteTx creates an rw transaction using a connection from the // specified pool. func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { - locker := newNoopLocker() - if db.cfg.WithTxLevelLock { - // Obtain the global lock instance. An alternative here is to - // obtain a database lock from Postgres. Unfortunately there is - // no database-level lock in Postgres, meaning that each table - // would need to be locked individually. Perhaps an advisory - // lock could perform this function too. - locker = &db.lock - if readOnly { - locker = db.lock.RLocker() - } - } - locker.Lock() - // Start the transaction. Don't use the timeout context because it would // be applied to the transaction as a whole. If possible, mark the // transaction as read-only to make sure that potential programming @@ -54,7 +36,6 @@ func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { }, ) if err != nil { - locker.Unlock() return nil, err } @@ -62,7 +43,6 @@ func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { db: db, tx: tx, active: true, - locker: locker, }, nil } @@ -94,7 +74,6 @@ func (tx *readWriteTx) Rollback() error { // Unlock the transaction regardless of the error result. tx.active = false - tx.locker.Unlock() return err } @@ -162,7 +141,6 @@ func (tx *readWriteTx) Commit() error { // Unlock the transaction regardless of the error result. tx.active = false - tx.locker.Unlock() return err } @@ -204,25 +182,3 @@ func (tx *readWriteTx) Exec(query string, args ...interface{}) (sql.Result, return tx.tx.ExecContext(ctx, query, args...) } - -// noopLocker is an implementation of a no-op sync.Locker. -type noopLocker struct{} - -// newNoopLocker creates a new noopLocker. -func newNoopLocker() sync.Locker { - return &noopLocker{} -} - -// Lock is a noop. -// -// NOTE: this is part of the sync.Locker interface. -func (n *noopLocker) Lock() { -} - -// Unlock is a noop. -// -// NOTE: this is part of the sync.Locker interface. -func (n *noopLocker) Unlock() { -} - -var _ sync.Locker = (*noopLocker)(nil) From 82a04f064bce3dc6b705efe5457ad38490a0a2b5 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Thu, 12 Dec 2024 10:38:02 -0800 Subject: [PATCH 04/12] log: add sub-logger for kvdb/sqlbase --- log.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/log.go b/log.go index 46047fb56c..d3c93b4325 100644 --- a/log.go +++ b/log.go @@ -26,6 +26,7 @@ import ( "github.com/lightningnetwork/lnd/healthcheck" "github.com/lightningnetwork/lnd/htlcswitch" "github.com/lightningnetwork/lnd/invoices" + "github.com/lightningnetwork/lnd/kvdb/sqlbase" "github.com/lightningnetwork/lnd/lncfg" "github.com/lightningnetwork/lnd/lnrpc/autopilotrpc" "github.com/lightningnetwork/lnd/lnrpc/chainrpc" @@ -151,6 +152,7 @@ func SetupLoggers(root *build.SubLoggerManager, interceptor signal.Interceptor) AddSubLogger(root, "DISC", interceptor, discovery.UseLogger) AddSubLogger(root, "NTFN", interceptor, chainntnfs.UseLogger) AddSubLogger(root, "CHDB", interceptor, channeldb.UseLogger) + AddSubLogger(root, "SQLB", interceptor, sqlbase.UseLogger) AddSubLogger(root, "HSWC", interceptor, htlcswitch.UseLogger) AddSubLogger(root, "CNCT", interceptor, contractcourt.UseLogger) AddSubLogger(root, "UTXN", interceptor, contractcourt.UseNurseryLogger) From 26564c2468786a38d40fa4e139333e594f15b1c3 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 18 Nov 2024 15:28:20 -0800 Subject: [PATCH 05/12] itest: fix flake in multi-hop payments To make this itest work reliably with multiple parallel SQL transactions, we need to count both the settle and final HTLC events. Otherwise, sometimes the final events from earlier forwards are counted before the forward events from later forwards, causing a miscount of the settle events. If we expect both the settle and final event for each forward, we don't miscount. --- itest/lnd_multi-hop-payments_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/itest/lnd_multi-hop-payments_test.go b/itest/lnd_multi-hop-payments_test.go index e965140523..9c55d7845a 100644 --- a/itest/lnd_multi-hop-payments_test.go +++ b/itest/lnd_multi-hop-payments_test.go @@ -223,11 +223,11 @@ func testMultiHopPayments(ht *lntest.HarnessTest) { // Dave and Alice should both have forwards and settles for // their role as forwarding nodes. ht.AssertHtlcEvents( - daveEvents, numPayments, 0, numPayments, 0, + daveEvents, numPayments, 0, numPayments*2, 0, routerrpc.HtlcEvent_FORWARD, ) ht.AssertHtlcEvents( - aliceEvents, numPayments, 0, numPayments, 0, + aliceEvents, numPayments, 0, numPayments*2, 0, routerrpc.HtlcEvent_FORWARD, ) From 42afb517ec91e206a23276c0edea654b67f2f720 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Fri, 1 Nov 2024 19:50:05 -0700 Subject: [PATCH 06/12] batch: handle serialization errors correctly --- batch/batch.go | 17 +++++++++-- batch/batch_test.go | 74 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 batch/batch_test.go diff --git a/batch/batch.go b/batch/batch.go index fcc691582a..9f4842c655 100644 --- a/batch/batch.go +++ b/batch/batch.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/lightningnetwork/lnd/kvdb" + "github.com/lightningnetwork/lnd/sqldb" ) // errSolo is a sentinel error indicating that the requester should re-run the @@ -55,8 +56,20 @@ func (b *batch) run() { for i, req := range b.reqs { err := req.Update(tx) if err != nil { - failIdx = i - return err + // If we get a serialization error, we + // want the underlying SQL retry + // mechanism to retry the entire batch. + // Otherwise, we can succeed in an + // sqldb retry and still re-execute the + // failing request individually. + dbErr := sqldb.MapSQLError(err) + if !sqldb.IsSerializationError(dbErr) { + failIdx = i + + return err + } + + return dbErr } } return nil diff --git a/batch/batch_test.go b/batch/batch_test.go new file mode 100644 index 0000000000..fef2c55979 --- /dev/null +++ b/batch/batch_test.go @@ -0,0 +1,74 @@ +package batch + +import ( + "errors" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/btcsuite/btcwallet/walletdb" + "github.com/lightningnetwork/lnd/kvdb" + "github.com/stretchr/testify/require" +) + +func TestRetry(t *testing.T) { + dbDir := t.TempDir() + + dbName := filepath.Join(dbDir, "weks.db") + db, err := walletdb.Create( + "bdb", dbName, true, kvdb.DefaultDBTimeout, + ) + if err != nil { + t.Fatalf("unable to create walletdb: %v", err) + } + t.Cleanup(func() { + db.Close() + }) + + var ( + mu sync.Mutex + called int + ) + sched := NewTimeScheduler(db, &mu, time.Second) + + // First, we construct a request that should retry individually and + // execute it non-lazily. It should still return the error the second + // time. + req := &Request{ + Update: func(tx kvdb.RwTx) error { + called++ + + return errors.New("test") + }, + } + err = sched.Execute(req) + + // Check and reset the called counter. + mu.Lock() + require.Equal(t, 2, called) + called = 0 + mu.Unlock() + + require.ErrorContains(t, err, "test") + + // Now, we construct a request that should NOT retry because it returns + // a serialization error, which should cause the underlying postgres + // transaction to retry. Since we aren't using postgres, this will + // cause the transaction to not be retried at all. + req = &Request{ + Update: func(tx kvdb.RwTx) error { + called++ + + return errors.New("could not serialize access") + }, + } + err = sched.Execute(req) + + // Check the called counter. + mu.Lock() + require.Equal(t, 1, called) + mu.Unlock() + + require.ErrorContains(t, err, "could not serialize access") +} From fa545f14240354529b2d374d00700e5b2ea886a6 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 4 Nov 2024 19:15:35 -0800 Subject: [PATCH 07/12] graph/db: handle previously-unhandled errors --- graph/db/graph.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/graph/db/graph.go b/graph/db/graph.go index a6b23e64b3..7608964364 100644 --- a/graph/db/graph.go +++ b/graph/db/graph.go @@ -1127,7 +1127,7 @@ func (c *ChannelGraph) addChannelEdge(tx kvdb.RwTx, err := addLightningNode(tx, &node1Shell) if err != nil { return fmt.Errorf("unable to create shell node "+ - "for: %x", edge.NodeKey1Bytes) + "for: %x: %w", edge.NodeKey1Bytes, err) } case node1Err != nil: return err @@ -1143,7 +1143,7 @@ func (c *ChannelGraph) addChannelEdge(tx kvdb.RwTx, err := addLightningNode(tx, &node2Shell) if err != nil { return fmt.Errorf("unable to create shell node "+ - "for: %x", edge.NodeKey2Bytes) + "for: %x: %w", edge.NodeKey2Bytes, err) } case node2Err != nil: return err @@ -2692,8 +2692,14 @@ func (c *ChannelGraph) delChannelEdgeUnsafe(edges, edgeIndex, chanIndex, // As part of deleting the edge we also remove all disabled entries // from the edgePolicyDisabledIndex bucket. We do that for both // directions. - updateEdgePolicyDisabledIndex(edges, cid, false, false) - updateEdgePolicyDisabledIndex(edges, cid, true, false) + err = updateEdgePolicyDisabledIndex(edges, cid, false, false) + if err != nil { + return err + } + err = updateEdgePolicyDisabledIndex(edges, cid, true, false) + if err != nil { + return err + } // With the edge data deleted, we can purge the information from the two // edge indexes. @@ -4410,11 +4416,14 @@ func putChanEdgePolicy(edges kvdb.RwBucket, edge *models.ChannelEdgePolicy, return err } - updateEdgePolicyDisabledIndex( + err = updateEdgePolicyDisabledIndex( edges, edge.ChannelID, edge.ChannelFlags&lnwire.ChanUpdateDirection > 0, edge.IsDisabled(), ) + if err != nil { + return err + } return edges.Put(edgeKey[:], b.Bytes()[:]) } From feafcc083944670875f7c2117f12acd1bd1c5230 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 6 Nov 2024 11:29:47 -0800 Subject: [PATCH 08/12] sqldb: improve serialization error handling --- go.mod | 3 +++ go.sum | 2 -- kvdb/go.mod | 6 +++++- kvdb/go.sum | 14 ++++++-------- sqldb/interfaces.go | 2 +- sqldb/sqlerrors.go | 33 +++++++++++++++++++++++++++++---- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 52aaf2b9b7..951077c759 100644 --- a/go.mod +++ b/go.mod @@ -206,6 +206,9 @@ replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 // Use local kvdb package until new version is tagged. replace github.com/lightningnetwork/lnd/kvdb => ./kvdb +// Use local sqldb package until new version is tagged. +replace github.com/lightningnetwork/lnd/sqldb => ./sqldb + // We want to format raw bytes as hex instead of base64. The forked version // allows us to specify that as an option. replace google.golang.org/protobuf => github.com/lightninglabs/protobuf-go-hex-display v1.30.0-hex-display diff --git a/go.sum b/go.sum index eb81c345ac..3050d87986 100644 --- a/go.sum +++ b/go.sum @@ -462,8 +462,6 @@ github.com/lightningnetwork/lnd/healthcheck v1.2.6 h1:1sWhqr93GdkWy4+6U7JxBfcyZI github.com/lightningnetwork/lnd/healthcheck v1.2.6/go.mod h1:Mu02um4CWY/zdTOvFje7WJgJcHyX2zq/FG3MhOAiGaQ= github.com/lightningnetwork/lnd/queue v1.1.1 h1:99ovBlpM9B0FRCGYJo6RSFDlt8/vOkQQZznVb18iNMI= github.com/lightningnetwork/lnd/queue v1.1.1/go.mod h1:7A6nC1Qrm32FHuhx/mi1cieAiBZo5O6l8IBIoQxvkz4= -github.com/lightningnetwork/lnd/sqldb v1.0.5 h1:ax5vBPf44tN/uD6C5+hBPBjOJ7cRMrUL+sVOdzmLVt4= -github.com/lightningnetwork/lnd/sqldb v1.0.5/go.mod h1:OG09zL/PHPaBJefp4HsPz2YLUJ+zIQHbpgCtLnOx8I4= github.com/lightningnetwork/lnd/ticker v1.1.1 h1:J/b6N2hibFtC7JLV77ULQp++QLtCwT6ijJlbdiZFbSM= github.com/lightningnetwork/lnd/ticker v1.1.1/go.mod h1:waPTRAAcwtu7Ji3+3k+u/xH5GHovTsCoSVpho0KDvdA= github.com/lightningnetwork/lnd/tlv v1.3.0 h1:exS/KCPEgpOgviIttfiXAPaUqw2rHQrnUOpP7HPBPiY= diff --git a/kvdb/go.mod b/kvdb/go.mod index 3dc4281642..f1faf2a293 100644 --- a/kvdb/go.mod +++ b/kvdb/go.mod @@ -16,7 +16,7 @@ require ( go.etcd.io/etcd/client/v3 v3.5.7 go.etcd.io/etcd/server/v3 v3.5.7 golang.org/x/net v0.22.0 - modernc.org/sqlite v1.29.8 + modernc.org/sqlite v1.29.10 ) require ( @@ -59,6 +59,7 @@ require ( github.com/jackc/pgproto3/v2 v2.3.3 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect github.com/jackc/pgtype v1.14.0 // indirect + github.com/jackc/pgx/v5 v5.3.1 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/json-iterator/go v1.1.11 // indirect github.com/lib/pq v1.10.9 // indirect @@ -135,6 +136,9 @@ replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompat // This replace is for https://github.com/advisories/GHSA-25xm-hr59-7c27 replace github.com/ulikunitz/xz => github.com/ulikunitz/xz v0.5.11 +// Use local sqldb package until new version is tagged. +replace github.com/lightningnetwork/lnd/sqldb => ../sqldb + // This replace is for // https://deps.dev/advisory/OSV/GO-2021-0053?from=%2Fgo%2Fgithub.com%252Fgogo%252Fprotobuf%2Fv1.3.1 replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 diff --git a/kvdb/go.sum b/kvdb/go.sum index 329dd2497c..ff51e77dd9 100644 --- a/kvdb/go.sum +++ b/kvdb/go.sum @@ -257,6 +257,8 @@ github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQ github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs= github.com/jackc/pgx/v4 v4.18.1 h1:YP7G1KABtKpB5IHrO9vYwSrCOhs7p3uqhvhhQBptya0= github.com/jackc/pgx/v4 v4.18.1/go.mod h1:FydWkUyadDmdNH/mHnGob881GawxeEm7TcMCzkb+qQE= +github.com/jackc/pgx/v5 v5.3.1 h1:Fcr8QJ1ZeLi5zsPZqQeUZhNhxfkkKBOgJuYkJHoBOtU= +github.com/jackc/pgx/v5 v5.3.1/go.mod h1:t3JDKnCBlYIc0ewLF0Q7B8MXmoIaBOZj/ic7iHozM/8= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= @@ -297,8 +299,6 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lightningnetwork/lnd/healthcheck v1.2.4 h1:lLPLac+p/TllByxGSlkCwkJlkddqMP5UCoawCj3mgFQ= github.com/lightningnetwork/lnd/healthcheck v1.2.4/go.mod h1:G7Tst2tVvWo7cx6mSBEToQC5L1XOGxzZTPB29g9Rv2I= -github.com/lightningnetwork/lnd/sqldb v1.0.2 h1:PfuYzScYMD9/QonKo/QvgsbXfTnH5DfldIimkfdW4Bk= -github.com/lightningnetwork/lnd/sqldb v1.0.2/go.mod h1:V2Xl6JNWLTKE97WJnwfs0d0TYJdIQTqK8/3aAwkd3qI= github.com/lightningnetwork/lnd/ticker v1.1.0 h1:ShoBiRP3pIxZHaETndfQ5kEe+S4NdAY1hiX7YbZ4QE4= github.com/lightningnetwork/lnd/ticker v1.1.0/go.mod h1:ubqbSVCn6RlE0LazXuBr7/Zi6QT0uQo++OgIRBxQUrk= github.com/lightningnetwork/lnd/tor v1.0.0 h1:wvEc7I+Y7IOtPglVP3cVBbYhiVhc7uTd7cMF9gQRzwA= @@ -310,8 +310,6 @@ github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= -github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.43 h1:JKfpVSCB84vrAmHzyrsxB5NAr5kLoMXZArPSw7Qlgyg= @@ -383,8 +381,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= @@ -738,8 +736,8 @@ modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= -modernc.org/sqlite v1.29.8 h1:nGKglNx9K5v0As+zF0/Gcl1kMkmaU1XynYyq92PbsC8= -modernc.org/sqlite v1.29.8/go.mod h1:lQPm27iqa4UNZpmr4Aor0MH0HkCLbt1huYDfWylLZFk= +modernc.org/sqlite v1.29.10 h1:3u93dz83myFnMilBGCOLbr+HjklS6+5rJLx4q86RDAg= +modernc.org/sqlite v1.29.10/go.mod h1:ItX2a1OVGgNsFh6Dv60JQvGfJfTPHPVpV6DF59akYOA= modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= diff --git a/sqldb/interfaces.go b/sqldb/interfaces.go index f81799c577..3c042aa5a7 100644 --- a/sqldb/interfaces.go +++ b/sqldb/interfaces.go @@ -22,7 +22,7 @@ const ( // DefaultNumTxRetries is the default number of times we'll retry a // transaction if it fails with an error that permits transaction // repetition. - DefaultNumTxRetries = 10 + DefaultNumTxRetries = 20 // DefaultRetryDelay is the default delay between retries. This will be // used to generate a random delay between 0 and this value. diff --git a/sqldb/sqlerrors.go b/sqldb/sqlerrors.go index 6cc1b1cf87..59729910ee 100644 --- a/sqldb/sqlerrors.go +++ b/sqldb/sqlerrors.go @@ -17,6 +17,16 @@ var ( // ErrRetriesExceeded is returned when a transaction is retried more // than the max allowed valued without a success. ErrRetriesExceeded = errors.New("db tx retries exceeded") + + // postgresErrMsgs are strings that signify retriable errors resulting + // from serialization failures. + postgresErrMsgs = []string{ + "could not serialize access", + "current transaction is aborted", + "not enough elements in RWConflictPool", + "deadlock detected", + "commit unexpectedly resulted in rollback", + } ) // MapSQLError attempts to interpret a given error as a database agnostic SQL @@ -41,10 +51,11 @@ func MapSQLError(err error) error { // Sometimes the error won't be properly wrapped, so we'll need to // inspect raw error itself to detect something we can wrap properly. // This handles a postgres variant of the error. - const postgresErrMsg = "could not serialize access" - if strings.Contains(err.Error(), postgresErrMsg) { - return &ErrSerializationError{ - DBError: err, + for _, postgresErrMsg := range postgresErrMsgs { + if strings.Contains(err.Error(), postgresErrMsg) { + return &ErrSerializationError{ + DBError: err, + } } } @@ -105,6 +116,20 @@ func parsePostgresError(pqErr *pgconn.PgError) error { DBError: pqErr, } + // In failed SQL transaction because we didn't catch a previous + // serialization error, so return this one as a serialization error. + case pgerrcode.InFailedSQLTransaction: + return &ErrSerializationError{ + DBError: pqErr, + } + + // Deadlock detedted because of a serialization error, so return this + // one as a serialization error. + case pgerrcode.DeadlockDetected: + return &ErrSerializationError{ + DBError: pqErr, + } + default: return fmt.Errorf("unknown postgres error: %w", pqErr) } From 7f1a72bb4a366761e21705b6707c0ac03186b21b Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 6 Nov 2024 10:21:34 -0800 Subject: [PATCH 09/12] Makefile: tune params for db-instance for postgres itests --- Makefile | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a8bf0bc98b..c531d5fbc2 100644 --- a/Makefile +++ b/Makefile @@ -195,9 +195,18 @@ ifeq ($(dbbackend),postgres) docker rm lnd-postgres --force || echo "Starting new postgres container" # Start a fresh postgres instance. Allow a maximum of 500 connections so - # that multiple lnd instances with a maximum number of connections of 50 - # each can run concurrently. - docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 500 + # that multiple lnd instances with a maximum number of connections of 20 + # each can run concurrently. Note that many of the settings here are + # specifically for integration testing and are not fit for running + # production nodes. The increase in max connections ensures that there + # are enough entries allocated for the RWConflictPool to allow multiple + # conflicting transactions to track serialization conflicts. The + # increase in predicate locks and locks per transaction is to allow the + # queries to lock individual rows instead of entire tables, helping + # reduce serialization conflicts. Disabling sequential scan for small + # tables also helps prevent serialization conflicts by ensuring lookups + # lock only relevant rows in the index rather than the entire table. + docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 1500 -c max_pred_locks_per_transaction=1024 -c max_locks_per_transaction=128 -c enable_seqscan=off docker logs -f lnd-postgres & # Wait for the instance to be started. From 819bc24096e782849ae55663f14bc51bf498cdcd Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Thu, 14 Nov 2024 21:48:14 -0800 Subject: [PATCH 10/12] Makefile: log to file instead of console --- Makefile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index c531d5fbc2..2f0c8c9bae 100644 --- a/Makefile +++ b/Makefile @@ -207,16 +207,19 @@ ifeq ($(dbbackend),postgres) # tables also helps prevent serialization conflicts by ensuring lookups # lock only relevant rows in the index rather than the entire table. docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 1500 -c max_pred_locks_per_transaction=1024 -c max_locks_per_transaction=128 -c enable_seqscan=off - docker logs -f lnd-postgres & + docker logs -f lnd-postgres >itest/postgres.log 2>&1 & # Wait for the instance to be started. sleep $(POSTGRES_START_DELAY) endif +clean-itest-logs: + rm -rf itest/*.log itest/.logs-* + #? itest-only: Only run integration tests without re-building binaries -itest-only: db-instance +itest-only: clean-itest-logs db-instance @$(call print, "Running integration tests with ${backend} backend.") - rm -rf itest/*.log itest/.logs-*; date + date EXEC_SUFFIX=$(EXEC_SUFFIX) scripts/itest_part.sh 0 1 $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS) -test.v $(COLLECT_ITEST_COVERAGE) @@ -227,9 +230,9 @@ itest: build-itest itest-only itest-race: build-itest-race itest-only #? itest-parallel: Build and run integration tests in parallel mode, running up to ITEST_PARALLELISM test tranches in parallel (default 4) -itest-parallel: build-itest db-instance +itest-parallel: clean-itest-logs build-itest db-instance @$(call print, "Running tests") - rm -rf itest/*.log itest/.logs-*; date + date EXEC_SUFFIX=$(EXEC_SUFFIX) scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS) $(COLLECT_ITEST_COVERAGE) From db4bf517a4641296bd65a2c6d8de907ed394b39f Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Fri, 15 Nov 2024 01:41:22 -0800 Subject: [PATCH 11/12] github workflow: save postgres log to zip file --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4646693f74..822e09c9dc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -355,7 +355,7 @@ jobs: - name: Zip log files on failure if: ${{ failure() }} timeout-minutes: 5 # timeout after 5 minute - run: 7z a logs-itest-${{ matrix.name }}.zip itest/**/*.log + run: 7z a logs-itest-${{ matrix.name }}.zip itest/**/*.log itest/postgres.log - name: Upload log files on failure uses: actions/upload-artifact@v3 From 175711a17d8a4b7075caa4e1f3dce42848c5a342 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 27 Nov 2024 14:05:00 -0800 Subject: [PATCH 12/12] docs: update release-notes for 0.19.0 --- docs/release-notes/release-notes-0.19.0.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release-notes/release-notes-0.19.0.md b/docs/release-notes/release-notes-0.19.0.md index a09d60566d..9a32eb1457 100644 --- a/docs/release-notes/release-notes-0.19.0.md +++ b/docs/release-notes/release-notes-0.19.0.md @@ -232,6 +232,11 @@ The underlying functionality between those two options remain the same. store](https://github.com/lightningnetwork/lnd/pull/9001) so that results are namespaced. All existing results are written to the "default" namespace. +* [Remove global application level lock for + Postgres](https://github.com/lightningnetwork/lnd/pull/9313) so multiple DB + transactions can run at once, increasing efficiency. Includes several bugfixes + to allow this to work properly. + ## Code Health * A code refactor that [moves all the graph related DB code out of the