Skip to content

Commit

Permalink
base: decrease store liveness durations
Browse files Browse the repository at this point in the history
Previously, store liveness used a heartbeat interval and support
duration of 3s and 6s, respectively. This matched the lease extension
and lease duration, respectively. However, these values were not well
aligned with Raft's election timeout (4s) and jitter (up to 2s), so
when a follower had to campaign after withdrawing support from the
leader, the store liveness durations added up to the Raft timeout and
jitter, instead of being subsumed by them.

This commit reduces the store liveness heartbeat interval and support
duration to 1s and 3s, respectively.

Fixes: #133613

Release note: None
  • Loading branch information
miraradeva committed Dec 17, 2024
1 parent 40c81e1 commit 3b35d19
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
38 changes: 30 additions & 8 deletions pkg/base/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,13 @@ var (
// Total latency [ 3.03s - 7.20s]
//
// Leader lease acquisition (including raft election):
// - Store Liveness heartbeat offset (0-1 heartbeat interval) [-3.00s - 0.00s]
// - Store Liveness expiration (constant) [ 6.00s - 6.00s]
// - Store Liveness heartbeat offset (0-1 heartbeat interval) [-1.00s - 0.00s]
// - Store Liveness expiration (constant) [ 3.00s - 3.00s]
// - Store Liveness withdrawal (0-1 withdrawal interval) [ 0.00s - 0.10s]
// - Raft election timeout jitter (random 0x-1x timeout) [ 0.00s - 2.00s]
// - Election (3x RTT: prevote, vote, append) [ 0.03s - 1.20s]
// - Lease acquisition (1x RTT: append) [ 0.01s - 0.40s]
// Total latency [ 3.04s - 9.70s]
// Total latency [ 2.04s - 6.70s]
//
// (generated by TestDefaultRaftConfig)
//
Expand Down Expand Up @@ -228,6 +228,16 @@ var (
DefaultRPCHeartbeatTimeout = envutil.EnvOrDefaultDuration(
"COCKROACH_RPC_HEARTBEAT_TIMEOUT", 3*NetworkTimeout)

// defaultStoreLivenessHeartbeatInterval is the default value for
// StoreLivenessHeartbeatInterval.
defaultStoreLivenessHeartbeatInterval = envutil.EnvOrDefaultDuration(
"COCKROACH_STORE_LIVENESS_HEARTBEAT_INTERVAL", time.Second)

// defaultStoreLivenessSupportDuration is the default value for
// StoreLivenessSupportDuration.
defaultStoreLivenessSupportDuration = envutil.EnvOrDefaultDuration(
"COCKROACH_STORE_LIVENESS_SUPPORT_DURATION", 3*time.Second)

// defaultRaftTickInterval is the default resolution of the Raft timer.
defaultRaftTickInterval = envutil.EnvOrDefaultDuration(
"COCKROACH_RAFT_TICK_INTERVAL", 500*time.Millisecond)
Expand Down Expand Up @@ -549,6 +559,14 @@ type RaftConfig struct {
// RaftHeartbeatIntervalTicks is the number of ticks that pass between heartbeats.
RaftHeartbeatIntervalTicks int64

// StoreLivenessHeartbeatInterval determines how ofter stores request and
// extend store liveness support.
StoreLivenessHeartbeatInterval time.Duration

// StoreLivenessSupportDuration is the duration of store liveness support that
// stores request and extend.
StoreLivenessSupportDuration time.Duration

// RangeLeaseRaftElectionTimeoutMultiplier specifies the range lease duration.
RangeLeaseDuration time.Duration
// RangeLeaseRenewalFraction specifies what fraction the range lease renewal
Expand Down Expand Up @@ -658,6 +676,12 @@ func (cfg *RaftConfig) SetDefaults() {
if cfg.RaftHeartbeatIntervalTicks == 0 {
cfg.RaftHeartbeatIntervalTicks = defaultRaftHeartbeatIntervalTicks
}
if cfg.StoreLivenessHeartbeatInterval == 0 {
cfg.StoreLivenessHeartbeatInterval = defaultStoreLivenessHeartbeatInterval
}
if cfg.StoreLivenessSupportDuration == 0 {
cfg.StoreLivenessSupportDuration = defaultStoreLivenessSupportDuration
}
if cfg.RangeLeaseDuration == 0 {
cfg.RangeLeaseDuration = defaultRangeLeaseDuration
}
Expand Down Expand Up @@ -769,11 +793,9 @@ func (cfg RaftConfig) NodeLivenessDurations() (livenessActive, livenessRenewal t
}

// StoreLivenessDurations computes durations for store liveness heartbeat
// interval and liveness interval.
func (cfg RaftConfig) StoreLivenessDurations() (livenessInterval, heartbeatInterval time.Duration) {
livenessInterval = cfg.RangeLeaseDuration
heartbeatInterval = time.Duration(float64(livenessInterval) * livenessRenewalFraction)
return
// interval and support duration.
func (cfg RaftConfig) StoreLivenessDurations() (supportDuration, heartbeatInterval time.Duration) {
return cfg.StoreLivenessSupportDuration, cfg.StoreLivenessHeartbeatInterval
}

// SentinelGossipTTL is time-to-live for the gossip sentinel, which is gossiped
Expand Down
4 changes: 3 additions & 1 deletion pkg/base/testdata/raft_config
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ echo
RaftElectionTimeoutTicks: (int64) 4,
RaftReproposalTimeoutTicks: (int64) 6,
RaftHeartbeatIntervalTicks: (int64) 2,
StoreLivenessHeartbeatInterval: (time.Duration) 1s,
StoreLivenessSupportDuration: (time.Duration) 3s,
RangeLeaseDuration: (time.Duration) 6s,
RangeLeaseRenewalFraction: (float64) 0.5,
RaftEnableCheckQuorum: (bool) true,
Expand All @@ -24,5 +26,5 @@ RaftReproposalTimeout: 3s
RangeLeaseDurations: active=6s renewal=3s
RangeLeaseAcquireTimeout: 4s
NodeLivenessDurations: active=6s renewal=3s
StoreLivenessDurations: active=6s renewal=3s
StoreLivenessDurations: active=3s renewal=1s
SentinelGossipTTL: 3s
6 changes: 3 additions & 3 deletions pkg/base/testdata/raft_config_recovery
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ echo
// Total latency [ 3.03s - 7.20s]
//
// Leader lease acquisition (including raft election):
// - Store Liveness heartbeat offset (0-1 heartbeat interval) [-3.00s - 0.00s]
// - Store Liveness expiration (constant) [ 6.00s - 6.00s]
// - Store Liveness heartbeat offset (0-1 heartbeat interval) [-1.00s - 0.00s]
// - Store Liveness expiration (constant) [ 3.00s - 3.00s]
// - Store Liveness withdrawal (0-1 withdrawal interval) [ 0.00s - 0.10s]
// - Raft election timeout jitter (random 0x-1x timeout) [ 0.00s - 2.00s]
// - Election (3x RTT: prevote, vote, append) [ 0.03s - 1.20s]
// - Lease acquisition (1x RTT: append) [ 0.01s - 0.40s]
// Total latency [ 3.04s - 9.70s]
// Total latency [ 2.04s - 6.70s]

0 comments on commit 3b35d19

Please sign in to comment.