diff --git a/.changelog/5466.bugfix.md b/.changelog/5466.bugfix.md new file mode 100644 index 00000000000..79fafe46406 --- /dev/null +++ b/.changelog/5466.bugfix.md @@ -0,0 +1 @@ +go/consensus/cometbft/light: Try multiple sources when fetching blocks diff --git a/.changelog/5466.cfg.md b/.changelog/5466.cfg.md new file mode 100644 index 00000000000..0d15c870df8 --- /dev/null +++ b/.changelog/5466.cfg.md @@ -0,0 +1,4 @@ +Add `num_light_blocks_kept` configuration option + +Located under `consensus.prune`, it allows configuring the number of light +blocks that are kept in the local trusted store (defaulting to 10000). diff --git a/go/consensus/api/light.go b/go/consensus/api/light.go index e1a4498d7ba..559ce62bc04 100644 --- a/go/consensus/api/light.go +++ b/go/consensus/api/light.go @@ -22,6 +22,10 @@ type LightService interface { // LightClient is a consensus light client interface. type LightClient interface { + // GetStoredLightBlock retrieves a light block from the local light block store without doing + // any remote queries. + GetStoredLightBlock(height int64) (*LightBlock, error) + // GetLightBlock queries peers for a specific light block. GetLightBlock(ctx context.Context, height int64) (*LightBlock, rpc.PeerFeedback, error) diff --git a/go/consensus/cometbft/abci/prune.go b/go/consensus/cometbft/abci/prune.go index 307b9a4146b..16351a50829 100644 --- a/go/consensus/cometbft/abci/prune.go +++ b/go/consensus/cometbft/abci/prune.go @@ -9,16 +9,11 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" + "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/config" nodedb "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" ) const ( - // PruneDefault is the default PruneStrategy. - PruneDefault = pruneNone - - pruneNone = "none" - pruneKeepN = "keep_n" - // LogEventABCIPruneDelete is a log event value that signals an ABCI pruning // delete event. LogEventABCIPruneDelete = "cometbft/abci/prune" @@ -38,9 +33,9 @@ const ( func (s PruneStrategy) String() string { switch s { case PruneNone: - return pruneNone + return config.PruneStrategyNone case PruneKeepN: - return pruneKeepN + return config.PruneStrategyKeepN default: return "[unknown]" } @@ -48,9 +43,9 @@ func (s PruneStrategy) String() string { func (s *PruneStrategy) FromString(str string) error { switch strings.ToLower(str) { - case pruneNone: + case config.PruneStrategyNone: *s = PruneNone - case pruneKeepN: + case config.PruneStrategyKeepN: *s = PruneKeepN default: return fmt.Errorf("abci/pruner: unknown pruning strategy: '%v'", str) diff --git a/go/consensus/cometbft/cometbft.go b/go/consensus/cometbft/cometbft.go index 301707c322e..dcebff17f37 100644 --- a/go/consensus/cometbft/cometbft.go +++ b/go/consensus/cometbft/cometbft.go @@ -33,6 +33,12 @@ func New( } // NewLightClient creates a new CometBFT light client service. -func NewLightClient(ctx context.Context, dataDir string, genesis *genesisAPI.Document, consensus consensusAPI.Backend, p2p rpc.P2P) (lightAPI.ClientService, error) { +func NewLightClient( + ctx context.Context, + dataDir string, + genesis *genesisAPI.Document, + consensus consensusAPI.Backend, + p2p rpc.P2P, +) (lightAPI.ClientService, error) { return light.New(ctx, dataDir, genesis, consensus, p2p) } diff --git a/go/consensus/cometbft/config/config.go b/go/consensus/cometbft/config/config.go index 6f5251cd10f..bffbf2356ce 100644 --- a/go/consensus/cometbft/config/config.go +++ b/go/consensus/cometbft/config/config.go @@ -87,6 +87,13 @@ type SubmissionConfig struct { MaxFee uint64 `yaml:"max_fee"` } +const ( + // PruneStrategyNone is the identifier of the strategy that disables pruning. + PruneStrategyNone = "none" + // PruneStrategyKeepN is the identifier of the strategy that keeps the last N versions. + PruneStrategyKeepN = "keep_n" +) + // PruneConfig is the CometBFT ABCI state pruning configuration structure. type PruneConfig struct { // ABCI state pruning strategy. @@ -95,6 +102,8 @@ type PruneConfig struct { NumKept uint64 `yaml:"num_kept"` // ABCI state pruning interval. Interval time.Duration `yaml:"interval"` + // Light blocks kept in trusted store. + NumLightBlocksKept uint16 `yaml:"num_light_blocks_kept"` } // CheckpointerConfig is the CometBFT ABCI state pruning configuration structure. @@ -200,9 +209,10 @@ func DefaultConfig() Config { HaltHeight: 0, UpgradeStopDelay: 60 * time.Second, Prune: PruneConfig{ - Strategy: "none", - NumKept: 3600, - Interval: 2 * time.Minute, + Strategy: PruneStrategyNone, + NumKept: 3600, + Interval: 2 * time.Minute, + NumLightBlocksKept: 10000, }, Checkpointer: CheckpointerConfig{ Disabled: false, diff --git a/go/consensus/cometbft/full/full.go b/go/consensus/cometbft/full/full.go index 69efb59a3c3..ef3596053ff 100644 --- a/go/consensus/cometbft/full/full.go +++ b/go/consensus/cometbft/full/full.go @@ -47,7 +47,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/db" lightAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/light/api" "github.com/oasisprotocol/oasis-core/go/consensus/metrics" - lightP2P "github.com/oasisprotocol/oasis-core/go/consensus/p2p/light" genesisAPI "github.com/oasisprotocol/oasis-core/go/genesis/api" cmflags "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" cmmetrics "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/metrics" @@ -473,9 +472,6 @@ func (t *fullService) RegisterP2PService(p2p p2pAPI.Service) error { } t.p2p = p2p - // Register consensus protocol server. - t.p2p.RegisterProtocolServer(lightP2P.NewServer(t.p2p, t.genesis.ChainContext(), t)) - return nil } diff --git a/go/consensus/cometbft/light/api/light.go b/go/consensus/cometbft/light/api/light.go index 626e2045583..08146c977a0 100644 --- a/go/consensus/cometbft/light/api/light.go +++ b/go/consensus/cometbft/light/api/light.go @@ -3,6 +3,7 @@ package api import ( "context" + "fmt" cmtlight "github.com/cometbft/cometbft/light" cmtlightprovider "github.com/cometbft/cometbft/light/provider" @@ -41,6 +42,9 @@ type Provider interface { // PeerID returns the identifier of the peer backing the provider. PeerID() string + + // RefreshPeer notifies the provider to attempt to replace a peer with a fresh one. + RefreshPeer() } // ClientConfig is the configuration for the light client. @@ -51,3 +55,20 @@ type ClientConfig struct { // TrustOptions are CometBFT light client trust options. TrustOptions cmtlight.TrustOptions } + +// NewLightBlock creates a new consensus.LightBlock from a CometBFT light block. +func NewLightBlock(clb *cmttypes.LightBlock) (*consensus.LightBlock, error) { + plb, err := clb.ToProto() + if err != nil { + return nil, fmt.Errorf("failed to marshal light block: %w", err) + } + meta, err := plb.Marshal() + if err != nil { + return nil, fmt.Errorf("failed to marshal light block: %w", err) + } + + return &consensus.LightBlock{ + Height: clb.Height, + Meta: meta, + }, nil +} diff --git a/go/consensus/cometbft/light/internal.go b/go/consensus/cometbft/light/internal.go index afd9599414a..7c6836c244d 100644 --- a/go/consensus/cometbft/light/internal.go +++ b/go/consensus/cometbft/light/internal.go @@ -27,19 +27,70 @@ type lightClient struct { tmc *cmtlight.Client } +func tryProviders[R any]( + ctx context.Context, + lc *lightClient, + fn func(api.Provider) (R, rpc.PeerFeedback, error), +) (R, rpc.PeerFeedback, error) { + // Primary provider. + providers := append([]api.Provider{}, lc.tmc.Primary().(api.Provider)) + // Additional providers. + for _, provider := range lc.tmc.Witnesses() { + providers = append(providers, provider.(api.Provider)) + } + + var ( + result R + err error + ) + for _, provider := range providers { + if ctx.Err() != nil { + return result, nil, ctx.Err() + } + + var pf rpc.PeerFeedback + result, pf, err = fn(provider) + if err == nil { + return result, pf, nil + } + } + + // Trigger primary provider refresh if everything fails. + providers[0].RefreshPeer() + + return result, nil, err +} + +// GetStoredBlock implements api.Client. +func (lc *lightClient) GetStoredLightBlock(height int64) (*consensus.LightBlock, error) { + clb, err := lc.tmc.TrustedLightBlock(height) + if err != nil { + return nil, err + } + return api.NewLightBlock(clb) +} + // GetLightBlock implements api.Client. func (lc *lightClient) GetLightBlock(ctx context.Context, height int64) (*consensus.LightBlock, rpc.PeerFeedback, error) { - return lc.getPrimary().GetLightBlock(ctx, height) + return tryProviders(ctx, lc, func(p api.Provider) (*consensus.LightBlock, rpc.PeerFeedback, error) { + return p.GetLightBlock(ctx, height) + }) } // GetParameters implements api.Client. func (lc *lightClient) GetParameters(ctx context.Context, height int64) (*consensus.Parameters, rpc.PeerFeedback, error) { - return lc.getPrimary().GetParameters(ctx, height) + return tryProviders(ctx, lc, func(p api.Provider) (*consensus.Parameters, rpc.PeerFeedback, error) { + return p.GetParameters(ctx, height) + }) } // SubmitEvidence implements api.Client. func (lc *lightClient) SubmitEvidence(ctx context.Context, evidence *consensus.Evidence) (rpc.PeerFeedback, error) { - return lc.getPrimary().SubmitEvidence(ctx, evidence) + _, pf, err := tryProviders(ctx, lc, func(p api.Provider) (any, rpc.PeerFeedback, error) { + pf, err := p.SubmitEvidence(ctx, evidence) + return nil, pf, err + }) + return pf, err } // GetVerifiedLightBlock implements Client. @@ -49,7 +100,7 @@ func (lc *lightClient) GetVerifiedLightBlock(ctx context.Context, height int64) // GetVerifiedLightBlock implements Client. func (lc *lightClient) GetVerifiedParameters(ctx context.Context, height int64) (*cmtproto.ConsensusParams, error) { - p, pf, err := lc.getPrimary().GetParameters(ctx, height) + p, pf, err := lc.GetParameters(ctx, height) if err != nil { return nil, err } @@ -89,10 +140,6 @@ func (lc *lightClient) GetVerifiedParameters(ctx context.Context, height int64) return ¶msPB, nil } -func (lc *lightClient) getPrimary() api.Provider { - return lc.tmc.Primary().(api.Provider) -} - // NewInternalClient creates an internal and non-persistent light client. // // This client is instantiated from the provided (obtained out of bound) trusted block diff --git a/go/consensus/cometbft/light/p2p/p2p.go b/go/consensus/cometbft/light/p2p/p2p.go index de3ad54d1e4..d7eece7bfaa 100644 --- a/go/consensus/cometbft/light/p2p/p2p.go +++ b/go/consensus/cometbft/light/p2p/p2p.go @@ -46,12 +46,13 @@ type providersPool struct { // NewLightClientProvider implements LightClientProvidersPool. func (p *providersPool) NewLightClientProvider() api.Provider { provider := &lightClientProvider{ - initCh: make(chan struct{}), - chainID: p.chainID, - p2pMgr: p.p2pMgr, - rc: p.rc, - logger: logging.GetLogger("cometbft/light/p2p"), - pool: p, + initCh: make(chan struct{}), + chainID: p.chainID, + p2pMgr: p.p2pMgr, + rc: p.rc, + refreshCh: make(chan struct{}, 1), + logger: logging.GetLogger("cometbft/light/p2p"), + pool: p, } go provider.worker(p.ctx) @@ -87,6 +88,8 @@ type lightClientProvider struct { p2pMgr rpc.PeerManager rc rpc.Client + refreshCh chan struct{} + // Guarded by lock. l sync.RWMutex peerID *core.PeerID @@ -106,6 +109,9 @@ func (lp *lightClientProvider) worker(ctx context.Context) { for { select { + case <-lp.refreshCh: + // Explicitly requested peer refresh. + lp.refreshPeer() case update := <-ch: peerID := lp.getPeer() switch { @@ -185,7 +191,7 @@ func (lp *lightClientProvider) Initialized() <-chan struct{} { return lp.initCh } -// Initialized implements api.Provider. +// PeerID implements api.Provider. func (lp *lightClientProvider) PeerID() string { peer := lp.getPeer() if peer == nil { @@ -196,6 +202,14 @@ func (lp *lightClientProvider) PeerID() string { return peer.String() } +// RefreshPeer implements api.Provider. +func (lp *lightClientProvider) RefreshPeer() { + select { + case lp.refreshCh <- struct{}{}: + default: + } +} + // MalevolentProvider implements api.Provider. func (lp *lightClientProvider) MalevolentProvider(peerID string) { lp.p2pMgr.RecordBadPeer(core.PeerID(peerID)) @@ -231,6 +245,12 @@ func (lp *lightClientProvider) SubmitEvidence(ctx context.Context, evidence *con return pf, nil } +// GetStoredLightBlock implements api.Provider. +func (lp *lightClientProvider) GetStoredLightBlock(_ int64) (*consensus.LightBlock, error) { + // The remote client provider stores no blocks locally. + return nil, consensus.ErrVersionNotFound +} + // GetLightBlock implements api.Provider. func (lp *lightClientProvider) GetLightBlock(ctx context.Context, height int64) (*consensus.LightBlock, rpc.PeerFeedback, error) { peerID := lp.getPeer() @@ -247,7 +267,7 @@ func (lp *lightClientProvider) GetLightBlock(ctx context.Context, height int64) // Ensure peer returned the block for the queried height. if rsp.Height != height { pf.RecordBadPeer() - return nil, nil, err + return nil, nil, consensus.ErrVersionNotFound } return &rsp, pf, nil @@ -269,7 +289,7 @@ func (lp *lightClientProvider) GetParameters(ctx context.Context, height int64) // Ensure peer returned the parameters for the queried height. if rsp.Height != height { pf.RecordBadPeer() - return nil, nil, err + return nil, nil, consensus.ErrVersionNotFound } return &rsp, pf, nil diff --git a/go/consensus/cometbft/light/service.go b/go/consensus/cometbft/light/service.go index 31225f895b8..16b07a28407 100644 --- a/go/consensus/cometbft/light/service.go +++ b/go/consensus/cometbft/light/service.go @@ -3,6 +3,7 @@ package light import ( "context" + "errors" "fmt" "path/filepath" "sync" @@ -20,8 +21,8 @@ import ( "github.com/oasisprotocol/oasis-core/go/config" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" cmtAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" - tmapi "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/common" + cmtConfig "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/config" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/db" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/light/api" p2pLight "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/light/p2p" @@ -150,7 +151,7 @@ func (c *client) worker() { c.logger.Error("failed to obtain chain context", "err", err) return } - tmChainID := tmapi.CometBFTChainID(chainCtx) + tmChainID := cmtAPI.CometBFTChainID(chainCtx) // Loads the local block at the provided height and adds it to the trust store. trustLocalBlock := func(ctx context.Context, height int64) error { @@ -207,15 +208,26 @@ func (c *client) worker() { providers = append(providers, p) c.providers = append(c.providers, p) } + + opts := []cmtlight.Option{ + cmtlight.MaxRetryAttempts(lcMaxRetryAttempts), + cmtlight.Logger(common.NewLogAdapter(!config.GlobalConfig.Consensus.LogDebug)), + cmtlight.DisableProviderRemoval(), + } + switch config.GlobalConfig.Consensus.Prune.Strategy { + case cmtConfig.PruneStrategyNone: + opts = append(opts, cmtlight.PruningSize(0)) // Disable pruning the light store. + default: + opts = append(opts, cmtlight.PruningSize(config.GlobalConfig.Consensus.Prune.NumLightBlocksKept)) + } + tmc, err := cmtlight.NewClientFromTrustedStore( tmChainID, config.GlobalConfig.Consensus.StateSync.TrustPeriod, providers[0], // Primary provider. providers[1:], // Witnesses. c.store, - cmtlight.MaxRetryAttempts(lcMaxRetryAttempts), - cmtlight.Logger(common.NewLogAdapter(!config.GlobalConfig.Consensus.LogDebug)), - cmtlight.DisableProviderRemoval(), + opts..., ) if err != nil { c.logger.Error("failed to initialize cometbft light client", "err", err) @@ -246,6 +258,15 @@ func (c *client) worker() { } } +// GetStoredBlock implements api.Client. +func (c *client) GetStoredLightBlock(height int64) (*consensus.LightBlock, error) { + clb, err := c.store.LightBlock(height) + if err != nil { + return nil, err + } + return api.NewLightBlock(clb) +} + // GetLightBlock implements api.Client. func (c *client) GetLightBlock(ctx context.Context, height int64) (*consensus.LightBlock, rpc.PeerFeedback, error) { select { @@ -254,14 +275,59 @@ func (c *client) GetLightBlock(ctx context.Context, height int64) (*consensus.Li return nil, nil, ctx.Err() } - // Try local backend first. - lb, err := c.consensus.GetLightBlock(ctx, height) - if err == nil { + // Local backend source. + localBackendSource := func() (*consensus.LightBlock, rpc.PeerFeedback, error) { + lb, err := c.consensus.GetLightBlock(ctx, height) + if err != nil { + c.logger.Debug("failed to fetch light block from local full node", + "err", err, + "height", height, + ) + return nil, nil, err + } + return lb, rpc.NewNopPeerFeedback(), nil } - c.logger.Debug("failed to fetch light block from local full node", "err", err) - return c.lc.GetLightBlock(ctx, height) + // Light client. + lightClientSource := func() (*consensus.LightBlock, rpc.PeerFeedback, error) { + clb, err := c.lc.GetVerifiedLightBlock(ctx, height) + if err != nil { + c.logger.Debug("failed to fetch light block from light client", + "err", err, + "height", height, + ) + return nil, nil, err + } + + lb, err := api.NewLightBlock(clb) + if err != nil { + return nil, nil, err + } + return lb, rpc.NewNopPeerFeedback(), nil + } + + // Direct peer query. + directPeerQuerySource := func() (*consensus.LightBlock, rpc.PeerFeedback, error) { + return c.lc.GetLightBlock(ctx, height) + } + + // Try all sources in order. + var mergedErr error + for _, src := range []func() (*consensus.LightBlock, rpc.PeerFeedback, error){ + localBackendSource, + lightClientSource, + directPeerQuerySource, + } { + lb, pf, err := src() + if err == nil { + return lb, pf, nil + } + + mergedErr = errors.Join(mergedErr, err) + } + + return nil, nil, mergedErr } // GetParameters implements api.Client. diff --git a/go/consensus/p2p/light/server.go b/go/consensus/p2p/light/server.go index 0d1dc3cb62f..f0d907998c9 100644 --- a/go/consensus/p2p/light/server.go +++ b/go/consensus/p2p/light/server.go @@ -20,7 +20,9 @@ const ( type service struct { consensus consensus.Backend - logger *logging.Logger + lc consensus.LightClient + + logger *logging.Logger } func (s *service) HandleRequest(ctx context.Context, method string, body cbor.RawMessage) (interface{}, error) { @@ -30,12 +32,7 @@ func (s *service) HandleRequest(ctx context.Context, method string, body cbor.Ra if err := cbor.Unmarshal(body, &rq); err != nil { return nil, rpc.ErrBadRequest } - - lb, err := s.consensus.GetLightBlock(ctx, rq) - if err != nil { - return nil, err - } - return lb, nil + return s.handleGetLightBlock(ctx, rq) case MethodGetParameters: var rq int64 if err := cbor.Unmarshal(body, &rq); err != nil { @@ -59,11 +56,32 @@ func (s *service) HandleRequest(ctx context.Context, method string, body cbor.Ra } } +func (s *service) handleGetLightBlock(ctx context.Context, height int64) (*consensus.LightBlock, error) { + lb, err := s.consensus.GetLightBlock(ctx, height) + if err != nil { + // Also try the local light store. + if lb, err = s.lc.GetStoredLightBlock(height); err != nil { + return nil, err + } + } + return lb, nil +} + // NewServer creates a new light block sync protocol server. -func NewServer(p2p rpc.P2P, chainContext string, consensus consensus.Backend) rpc.Server { +func NewServer( + p2p rpc.P2P, + chainContext string, + consensus consensus.Backend, + lightClient consensus.LightClient, +) rpc.Server { p2p.RegisterProtocol(ProtocolID(chainContext), minProtocolPeers, totalProtocolPeers) + return rpc.NewServer( ProtocolID(chainContext), - &service{consensus, logging.GetLogger("consensus/p22/light/server")}, + &service{ + consensus: consensus, + lc: lightClient, + logger: logging.GetLogger("consensus/p2p/light/server"), + }, ) } diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index 146ecafbd72..328122c18e1 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -18,6 +18,7 @@ import ( "github.com/oasisprotocol/oasis-core/go/config" consensusAPI "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft" + consensusLightP2P "github.com/oasisprotocol/oasis-core/go/consensus/p2p/light" controlAPI "github.com/oasisprotocol/oasis-core/go/control/api" genesisAPI "github.com/oasisprotocol/oasis-core/go/genesis/api" governanceAPI "github.com/oasisprotocol/oasis-core/go/governance/api" @@ -581,6 +582,9 @@ func NewNode() (node *Node, err error) { // nolint: gocyclo } node.svcMgr.Register(node.LightClient) + // Register consensus light client P2P protocol server. + node.P2P.RegisterProtocolServer(consensusLightP2P.NewServer(node.P2P, node.chainContext, node.Consensus, node.LightClient)) + // If the consensus backend supports communicating with consensus services, we can also start // all services required for runtime operation. if node.Consensus.SupportedFeatures().Has(consensusAPI.FeatureServices) {