Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EFM] Fix bugs from Benchnet testing #6898

Merged
merged 11 commits into from
Jan 20, 2025
3 changes: 2 additions & 1 deletion cmd/consensus/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ func main() {
PostInit(func(nodeConfig *cmd.NodeConfig) error {
// TODO(EFM, #6794): This function is introduced to implement a backward-compatible upgrade from v1 to v2.
// Remove this once we complete the network upgrade.
if err := operation.RetryOnConflict(nodeBuilder.DB.Update, operation.MigrateDKGEndStateFromV1()); err != nil {
log := nodeConfig.Logger.With().Str("postinit", "dkg_end_state_migration").Logger()
if err := operation.RetryOnConflict(nodeBuilder.SecretsDB.Update, operation.MigrateDKGEndStateFromV1(log)); err != nil {
return fmt.Errorf("could not migrate DKG end state from v1 to v2: %w", err)
}
return nil
Expand Down
14 changes: 11 additions & 3 deletions engine/consensus/dkg/reactor_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,16 +284,24 @@ func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uin

return
}
if currentState != flow.DKGStateCompleted {
log.Warn().Msgf("checking beacon key consistency: exiting because dkg didn't reach completed state: %s", currentState.String())
// (i) if I have a key (currentState == flow.DKGStateCompleted) which is consistent with the EpochCommit service event,
// then commit the key or (ii) if the key is already committed (currentState == flow.RandomBeaconKeyCommitted), then we
// expect it to be consistent with the EpochCommit service event. While (ii) is a sanity check, we have a severe problem
// if it is violated, because a node signing with an invalid Random beacon key will be slashed - so we better check!
// Our logic for committing a key is idempotent: it is a no-op when stating that a key `k` should be committed that previously
// has already been committed; while it errors if `k` is different from the previously-committed key. In other words, the
// sanity check (ii) is already included in the happy-path logic for (i). So we just repeat the happy-path logic also for
// currentState == flow.RandomBeaconKeyCommitted, because repeated calls only occur due to node crashes, which are rare.
if currentState != flow.DKGStateCompleted && currentState != flow.RandomBeaconKeyCommitted {
log.Warn().Msgf("checking beacon key consistency after EpochCommit: exiting because dkg didn't reach completed state: %s", currentState.String())
return
}
snapshot := e.State.AtBlockID(firstBlock.ID())

// Since epoch phase transitions are emitted when the first block of the new
// phase is finalized, the block's snapshot is guaranteed to already be
// accessible in the protocol state at this point (even though the Badger
// transaction finalizing the block has not been committed yet).
snapshot := e.State.AtBlockID(firstBlock.ID())
nextDKG, err := snapshot.Epochs().Next().DKG()
if err != nil {
// CAUTION: this should never happen, indicates a storage failure or corruption
Expand Down
7 changes: 6 additions & 1 deletion storage/badger/operation/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"

"github.com/dgraph-io/badger/v2"
"github.com/rs/zerolog"

"github.com/onflow/flow-go/model/encodable"
"github.com/onflow/flow-go/model/flow"
Expand Down Expand Up @@ -81,7 +82,7 @@ func RetrieveDKGStateForEpoch(epochCounter uint64, currentState *flow.DKGState)
// It reads already stored data by deprecated prefix and writes it to the new prefix with values converted to the new representation.
// TODO(EFM, #6794): This function is introduced to implement a backward-compatible upgrade from v1 to v2.
// Remove this once we complete the network upgrade.
func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
func MigrateDKGEndStateFromV1(log zerolog.Logger) func(txn *badger.Txn) error {
return func(txn *badger.Txn) error {
var ops []func(*badger.Txn) error
err := traverse(makePrefix(codeDKGEndState), func() (checkFunc, createFunc, handleFunc) {
Expand Down Expand Up @@ -110,6 +111,7 @@ func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
ops = append(ops,
UpsertDKGStateForEpoch(epochCounter, newState),
remove(makePrefix(codeDKGEndState, epochCounter)))
log.Info().Msgf("removing %d->%d, writing %d->%d", epochCounter, oldState, epochCounter, newState)

return nil
}
Expand All @@ -124,6 +126,9 @@ func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
return fmt.Errorf("aborting conversion from DKG end states: %w", err)
}
}
if len(ops) > 0 {
log.Info().Msgf("finished migrating %d DKG end states", len(ops))
}
return nil
}
}
4 changes: 2 additions & 2 deletions storage/badger/operation/dkg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func TestMigrateDKGEndStateFromV1(t *testing.T) {
}

// migrate the state
err := db.Update(MigrateDKGEndStateFromV1())
err := db.Update(MigrateDKGEndStateFromV1(unittest.Logger()))
assert.NoError(t, err)

assertMigrationSuccessful := func() {
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestMigrateDKGEndStateFromV1(t *testing.T) {
assertMigrationSuccessful()

// migrating again should be no-op
err = db.Update(MigrateDKGEndStateFromV1())
err = db.Update(MigrateDKGEndStateFromV1(unittest.Logger()))
assert.NoError(t, err)
assertMigrationSuccessful()
})
Expand Down
Loading