Skip to content

Commit

Permalink
loq: clear LeadEpoch when re-writing the RangeDescriptor
Browse files Browse the repository at this point in the history
LoQ tool is used when we have some ranges that lost quorum. It removes
some replicas from the RangeDescriptor. If the fortified leader is
removed from the RangeDescriptor, SupportFor() will return epoch=0.
This will fire an assertion since supportFor epochs should never
regress. This commit changes the LoQ behaviour where it resets the
LeadEpoch when rewriting the ReplicaDescriptor.

Fixes: #136908

Release note: None
  • Loading branch information
iskettaneh committed Dec 10, 2024
1 parent 4b94ccb commit 4bd1fbc
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
13 changes: 13 additions & 0 deletions pkg/kv/kvserver/loqrecovery/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,19 @@ func applyReplicaUpdate(
return PrepareReplicaReport{}, errors.Wrap(err, "updating MVCCStats")
}

// Update the HardState to clear the LeadEpoch, as otherwise we may risk
// seeing an epoch regression in raft. See #136908 for more details.
hs, err := sl.LoadHardState(ctx, readWriter)
if err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "loading HardState")
}

hs.LeadEpoch = 0

if err := sl.SetHardState(ctx, readWriter, hs); err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "setting HardState")
}

return report, nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/loqrecovery/server_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ func TestRetrieveApplyStatus(t *testing.T) {

// We currently don't clear out the LeadEpoch field when recovering from a
// loss of quorum, so we can't run with leader leases on in this test.
tc, _, _ := prepTestCluster(ctx, t, 5, true /* disableLeaderLease */)
tc, _, _ := prepTestCluster(ctx, t, 5, false /* disableLeaderLease */)
defer tc.Stopper().Stop(ctx)

// Use scratch range to ensure we have a range that loses quorum.
Expand Down

0 comments on commit 4bd1fbc

Please sign in to comment.