From f4564fecfff26b768cdd67880fccbca4774db96e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Sun, 5 Nov 2023 00:28:21 +0800 Subject: [PATCH 1/2] Refactor: fix handling of non-voter leader Openraft does not require Leader/Candidate to be a voter. Before this commit, Openraft's handling of non-voter leader is inconsistent: For a node that has a vote proposed by this node and is committed, and is not a voter: - [`calc_server_state()`](https://github.com/datafuselabs/openraft/blob/97254a31c673e52429ee7187de96fd2ea2a5ce98/openraft/src/raft_state/mod.rs#L323) returns `Learner`. - But [`is_leader()`](https://github.com/datafuselabs/openraft/blob/97254a31c673e52429ee7187de96fd2ea2a5ce98/openraft/src/raft_state/mod.rs#L353) returns true. Since this commit, `calc_server_state()` and `is_leader()` returns consistent result according to the following table: For node-2: - Node-2 with vote `(term=1, node_id=2, committed=true)`: - is a leader if it is **present** in config, either a voter or non-voter. - is a learner if it is **absent** in config. - Node-2 with vote `(term=1, node_id=2, committed=false)`: - is a candidate if it is **present** in config, either a voter or non-voter. - is a learner if it is **absent** in config. - Node-3 with vote `(term=1, node_id=99, committed=false|true)`: - is a follower if it is a **voter** in config, - is a learner if it is a **non-voter** or **absent** in config. | vote \ membership | Voter | Non-voter | Absent | |---------------------------------------|-----------|-----------|---------| | (term=1, node_id=2, committed=true) | leader | leader | learner | | (term=1, node_id=2, committed=false) | candidate | candidate | learner | | (term=1, node_id=99, committed=true) | follower | learner | learner | | (term=1, node_id=99, committed=false) | follower | learner | learner | - Fix: #920 --- openraft/src/docs/data/vote.md | 30 ++++++--- openraft/src/engine/engine_impl.rs | 6 +- .../leader_handler/append_entries_test.rs | 2 +- .../leader_handler/send_heartbeat_test.rs | 2 +- .../src/engine/handler/vote_handler/mod.rs | 2 +- openraft/src/engine/tests/startup_test.rs | 23 +++++++ openraft/src/membership/membership.rs | 5 ++ openraft/src/raft/message/client_write.rs | 3 +- .../src/raft_state/membership_state/mod.rs | 6 ++ openraft/src/raft_state/mod.rs | 45 +++++++++---- tests/tests/life_cycle/main.rs | 1 + .../t90_issue_920_non_voter_leader_restart.rs | 41 ++++++++++++ tests/tests/membership/t31_remove_leader.rs | 66 ++++++++++++++++++- 13 files changed, 199 insertions(+), 33 deletions(-) create mode 100644 tests/tests/life_cycle/t90_issue_920_non_voter_leader_restart.rs diff --git a/openraft/src/docs/data/vote.md b/openraft/src/docs/data/vote.md index 9463b5ba5..7b4e5bfb3 100644 --- a/openraft/src/docs/data/vote.md +++ b/openraft/src/docs/data/vote.md @@ -41,7 +41,7 @@ The only difference between these two modes is the definition of `LeaderId`, and See: [`leader-id`]. -## Vote defines the server state +## Vote and Membership define the server state In the default mode, the `Vote` defines the server state(leader, candidate, follower or learner). A server state has a unique corresponding `vote`, thus `vote` can be used to identify different server @@ -55,19 +55,29 @@ new membership log is replicated to a follower or learner. E.g.: -- A vote `(term=1, node_id=2, committed=false)` is in a candidate state for - node-2. +- Node-2 with vote `(term=1, node_id=2, committed=true)`: + - is a leader if it is **present** in config, either a voter or non-voter. + - is a learner if it is **absent** in config. -- A vote `(term=1, node_id=2, committed=true)` is in a leader state for - node-2. +- Node-2 with vote `(term=1, node_id=2, committed=false)`: + - is a candidate if it is **present** in config, either a voter or non-voter. + - is a learner if it is **absent** in config. -- A vote `(term=1, node_id=2, committed=false|true)` is in a follower/learner - state for node-3. +- Node-3 with vote `(term=1, node_id=99, committed=false|true)`: + - is a follower if it is a **voter** in config, + - is a learner if it is a **non-voter** or **absent** in config. + +For node-2: + +| vote \ membership | Voter | Non-voter | Absent | +|---------------------------------------|-----------|-----------|---------| +| (term=1, node_id=2, committed=true) | leader | leader | learner | +| (term=1, node_id=2, committed=false) | candidate | candidate | learner | +| (term=1, node_id=99, committed=true) | follower | learner | learner | +| (term=1, node_id=99, committed=false) | follower | learner | learner | -- A vote `(term=1, node_id=1, committed=false|true)` is in another different - follower/learner state for node-3. [`Vote`]: `crate::vote::Vote` [`single-term-leader`]: `crate::docs::feature_flags` -[`leader-id`]: `crate::docs::data::leader_id` \ No newline at end of file +[`leader-id`]: `crate::docs::data::leader_id` diff --git a/openraft/src/engine/engine_impl.rs b/openraft/src/engine/engine_impl.rs index fe0e1c26d..a347afd9b 100644 --- a/openraft/src/engine/engine_impl.rs +++ b/openraft/src/engine/engine_impl.rs @@ -95,7 +95,6 @@ where C: RaftTypeConfig } } - // TODO: test it #[tracing::instrument(level = "debug", skip_all)] pub(crate) fn startup(&mut self) { // Allows starting up as a leader. @@ -517,10 +516,7 @@ where C: RaftTypeConfig #[allow(clippy::collapsible_if)] if em.log_id().as_ref() <= self.state.committed() { - if !em.is_voter(&self.config.id) && self.state.is_leading(&self.config.id) { - tracing::debug!("leader {} is stepping down", self.config.id); - self.vote_handler().become_following(); - } + self.vote_handler().update_internal_server_state(); } } diff --git a/openraft/src/engine/handler/leader_handler/append_entries_test.rs b/openraft/src/engine/handler/leader_handler/append_entries_test.rs index 70a17b7c0..b8a23a663 100644 --- a/openraft/src/engine/handler/leader_handler/append_entries_test.rs +++ b/openraft/src/engine/handler/leader_handler/append_entries_test.rs @@ -38,7 +38,7 @@ fn m1_2() -> Membership { } fn m23() -> Membership { - Membership::::new(vec![btreeset! {2,3}], None) + Membership::::new(vec![btreeset! {2,3}], btreeset! {1,2,3}) } fn eng() -> Engine { diff --git a/openraft/src/engine/handler/leader_handler/send_heartbeat_test.rs b/openraft/src/engine/handler/leader_handler/send_heartbeat_test.rs index 6e347e2e1..ae67a51bd 100644 --- a/openraft/src/engine/handler/leader_handler/send_heartbeat_test.rs +++ b/openraft/src/engine/handler/leader_handler/send_heartbeat_test.rs @@ -23,7 +23,7 @@ fn m01() -> Membership { } fn m23() -> Membership { - Membership::::new(vec![btreeset! {2,3}], None) + Membership::::new(vec![btreeset! {2,3}], btreeset! {1,2,3}) } fn eng() -> Engine { diff --git a/openraft/src/engine/handler/vote_handler/mod.rs b/openraft/src/engine/handler/vote_handler/mod.rs index 8652548c9..c6b6bdd1b 100644 --- a/openraft/src/engine/handler/vote_handler/mod.rs +++ b/openraft/src/engine/handler/vote_handler/mod.rs @@ -124,7 +124,7 @@ where C: RaftTypeConfig /// Enter leading or following state by checking `vote`. pub(crate) fn update_internal_server_state(&mut self) { - if self.state.vote_ref().leader_id().voted_for() == Some(self.config.id) { + if self.state.is_leading(&self.config.id) { self.become_leading(); } else { self.become_following(); diff --git a/openraft/src/engine/tests/startup_test.rs b/openraft/src/engine/tests/startup_test.rs index 43279b714..e932e8c65 100644 --- a/openraft/src/engine/tests/startup_test.rs +++ b/openraft/src/engine/tests/startup_test.rs @@ -15,6 +15,10 @@ use crate::ServerState; use crate::TokioInstant; use crate::Vote; +fn m_empty() -> Membership { + Membership::::new(vec![btreeset! {}], None) +} + fn m23() -> Membership { Membership::::new(vec![btreeset! {2,3}], None) } @@ -63,6 +67,25 @@ fn test_startup_as_leader() -> anyhow::Result<()> { Ok(()) } +/// When starting up, a leader that is not a voter should not panic. +#[test] +fn test_startup_as_leader_not_voter_issue_920() -> anyhow::Result<()> { + let mut eng = eng(); + // self.id==2 is a voter: + eng.state + .membership_state + .set_effective(Arc::new(EffectiveMembership::new(Some(log_id(2, 1, 3)), m_empty()))); + // Committed vote makes it a leader at startup. + eng.state.vote = UTime::new(TokioInstant::now(), Vote::new_committed(1, 2)); + + eng.startup(); + + assert_eq!(ServerState::Learner, eng.state.server_state); + assert_eq!(eng.output.take_commands(), vec![]); + + Ok(()) +} + #[test] fn test_startup_candidate_becomes_follower() -> anyhow::Result<()> { let mut eng = eng(); diff --git a/openraft/src/membership/membership.rs b/openraft/src/membership/membership.rs index 8ef107114..0af867d60 100644 --- a/openraft/src/membership/membership.rs +++ b/openraft/src/membership/membership.rs @@ -172,6 +172,11 @@ where N: Node, NID: NodeId, { + /// Return true if the given node id is an either voter or learner. + pub(crate) fn contains(&self, node_id: &NID) -> bool { + self.nodes.contains_key(node_id) + } + /// Check if the given `NodeId` exists and is a voter. pub(crate) fn is_voter(&self, node_id: &NID) -> bool { for c in self.configs.iter() { diff --git a/openraft/src/raft/message/client_write.rs b/openraft/src/raft/message/client_write.rs index b063c8c89..4cdc708c1 100644 --- a/openraft/src/raft/message/client_write.rs +++ b/openraft/src/raft/message/client_write.rs @@ -1,6 +1,5 @@ use std::fmt::Debug; -use crate::AppDataResponse; use crate::LogId; use crate::Membership; use crate::MessageSummary; @@ -10,7 +9,7 @@ use crate::RaftTypeConfig; #[cfg_attr( feature = "serde", derive(serde::Deserialize, serde::Serialize), - serde(bound = "C::R: AppDataResponse") + serde(bound = "C::R: crate::AppDataResponse") )] pub struct ClientWriteResponse { /// The id of the log that is applied. diff --git a/openraft/src/raft_state/membership_state/mod.rs b/openraft/src/raft_state/membership_state/mod.rs index fdeddd3a1..7ce53c811 100644 --- a/openraft/src/raft_state/membership_state/mod.rs +++ b/openraft/src/raft_state/membership_state/mod.rs @@ -77,6 +77,12 @@ where Self { committed, effective } } + /// Return true if the given node id is an either voter or learner. + pub(crate) fn contains(&self, id: &NID) -> bool { + self.effective.membership().contains(id) + } + + /// Check if the given `NodeId` exists and is a voter. pub(crate) fn is_voter(&self, id: &NID) -> bool { self.effective.membership().is_voter(id) } diff --git a/openraft/src/raft_state/mod.rs b/openraft/src/raft_state/mod.rs index a37675b9d..8a8d6fdd0 100644 --- a/openraft/src/raft_state/mod.rs +++ b/openraft/src/raft_state/mod.rs @@ -319,24 +319,34 @@ where } /// Determine the current server state by state. + /// + /// See [Determine Server State][] for more details about determining the server state. + /// + /// [Determine Server State]: crate::docs::data::vote#vote-and-membership-define-the-server-state #[tracing::instrument(level = "debug", skip_all)] pub(crate) fn calc_server_state(&self, id: &NID) -> ServerState { tracing::debug!( - is_member = display(self.is_voter(id)), + contains = display(self.membership_state.contains(id)), + is_voter = display(self.is_voter(id)), is_leader = display(self.is_leader(id)), is_leading = display(self.is_leading(id)), "states" ); - if self.is_voter(id) { - if self.is_leader(id) { - ServerState::Leader - } else if self.is_leading(id) { - ServerState::Candidate - } else { + + // Openraft does not require Leader/Candidate to be a voter, i.e., a learner node could + // also be possible to be a leader. Although currently it is not supported. + // Allowing this will simplify leader step down: The leader just run as long as it wants to, + #[allow(clippy::collapsible_else_if)] + if self.is_leader(id) { + ServerState::Leader + } else if self.is_leading(id) { + ServerState::Candidate + } else { + if self.is_voter(id) { ServerState::Follower + } else { + ServerState::Learner } - } else { - ServerState::Learner } } @@ -346,12 +356,25 @@ where /// The node is candidate(leadership is not granted by a quorum) or leader(leadership is granted /// by a quorum) + /// + /// Note that in Openraft Leader does not have to be a voter. See [Determine Server State][] for + /// more details about determining the server state. + /// + /// [Determine Server State]: crate::docs::data::vote#vote-and-membership-define-the-server-state pub(crate) fn is_leading(&self, id: &NID) -> bool { - self.vote.leader_id().voted_for().as_ref() == Some(id) + self.membership_state.contains(id) && self.vote.leader_id().voted_for().as_ref() == Some(id) } + /// The node is leader + /// + /// Leadership is granted by a quorum and the vote is committed. + /// + /// Note that in Openraft Leader does not have to be a voter. See [Determine Server State][] for + /// more details about determining the server state. + /// + /// [Determine Server State]: crate::docs::data::vote#vote-and-membership-define-the-server-state pub(crate) fn is_leader(&self, id: &NID) -> bool { - self.vote.leader_id().voted_for().as_ref() == Some(id) && self.vote.is_committed() + self.is_leading(id) && self.vote.is_committed() } pub(crate) fn assign_log_ids<'a, Ent: RaftEntry + 'a>( diff --git a/tests/tests/life_cycle/main.rs b/tests/tests/life_cycle/main.rs index 61556ecda..f1ae5b7a5 100644 --- a/tests/tests/life_cycle/main.rs +++ b/tests/tests/life_cycle/main.rs @@ -13,3 +13,4 @@ mod t50_follower_restart_does_not_interrupt; mod t50_single_follower_restart; mod t50_single_leader_restart_re_apply_logs; mod t90_issue_607_single_restart; +mod t90_issue_920_non_voter_leader_restart; diff --git a/tests/tests/life_cycle/t90_issue_920_non_voter_leader_restart.rs b/tests/tests/life_cycle/t90_issue_920_non_voter_leader_restart.rs new file mode 100644 index 000000000..a781f50b4 --- /dev/null +++ b/tests/tests/life_cycle/t90_issue_920_non_voter_leader_restart.rs @@ -0,0 +1,41 @@ +use std::sync::Arc; +use std::time::Duration; + +use openraft::storage::RaftLogStorage; +use openraft::Config; +use openraft::ServerState; +use openraft::Vote; + +use crate::fixtures::init_default_ut_tracing; +use crate::fixtures::RaftRouter; + +/// Special case: A leader that is not a member(neither a voter or non-voter) should be started too, +/// as a learner. +#[async_entry::test(worker_threads = 8, init = "init_default_ut_tracing()", tracing_span = "debug")] +async fn issue_920_non_member_leader_restart() -> anyhow::Result<()> { + let config = Arc::new( + Config { + enable_heartbeat: false, + ..Default::default() + } + .validate()?, + ); + + let mut router = RaftRouter::new(config.clone()); + + let (mut log_store, sm) = router.new_store(); + // Set committed vote that believes node 0 is the leader. + log_store.save_vote(&Vote::new_committed(1, 0)).await?; + router.new_raft_node_with_sto(0, log_store, sm).await; + + router + .wait(&0, timeout()) + .state(ServerState::Learner, "node 0 becomes learner when startup") + .await?; + + Ok(()) +} + +fn timeout() -> Option { + Some(Duration::from_millis(1000)) +} diff --git a/tests/tests/membership/t31_remove_leader.rs b/tests/tests/membership/t31_remove_leader.rs index 7ae52c088..123cc76d9 100644 --- a/tests/tests/membership/t31_remove_leader.rs +++ b/tests/tests/membership/t31_remove_leader.rs @@ -94,10 +94,9 @@ async fn remove_leader() -> Result<()> { tracing::info!(log_index, "--- check state of the old leader"); { - let metrics = router.get_metrics(&0)?; + let metrics = router.wait(&0, timeout()).state(ServerState::Learner, "old leader steps down").await?; let cfg = &metrics.membership_config.membership(); - assert!(metrics.state != ServerState::Leader); assert_eq!(metrics.current_term, 1); assert_eq!(metrics.last_log_index, Some(8)); assert_eq!(metrics.last_applied, Some(LogId::new(CommittedLeaderId::new(1, 0), 8))); @@ -110,6 +109,69 @@ async fn remove_leader() -> Result<()> { Ok(()) } +/// Change membership from {0,1} to {1,2,3}, keep node-0 as learner; +/// +/// - The leader should NOT step down after joint log is committed. +#[async_entry::test(worker_threads = 8, init = "init_default_ut_tracing()", tracing_span = "debug")] +async fn remove_leader_and_convert_to_learner() -> Result<()> { + let config = Arc::new( + Config { + election_timeout_min: 800, + election_timeout_max: 1000, + enable_elect: false, + enable_heartbeat: false, + ..Default::default() + } + .validate()?, + ); + let mut router = RaftRouter::new(config.clone()); + + let mut log_index = router.new_cluster(btreeset! {0,1}, btreeset! {2,3}).await?; + + let old_leader = 0; + + tracing::info!(log_index, "--- change membership and retain removed node as learner"); + { + let node = router.get_raft_handle(&old_leader)?; + node.change_membership(btreeset![1, 2, 3], true).await?; + log_index += 2; + } + + tracing::info!(log_index, "--- old leader commits 2 membership log"); + { + router + .wait(&old_leader, timeout()) + .log(Some(log_index), "old leader commits 2 membership log") + .await?; + } + + // Another node(e.g. node-1) in the old cluster may not commit the second membership change log. + // Because to commit the 2nd log it only need a quorum of the new cluster. + + router + .wait(&1, timeout()) + .log_at_least( + Some(log_index - 1), + "node in old cluster commits at least 1 membership log", + ) + .await?; + + tracing::info!(log_index, "--- wait 1 sec, old leader(non-voter) stays as a leader"); + { + tokio::time::sleep(Duration::from_millis(1_000)).await; + + router + .wait(&0, timeout()) + .state( + ServerState::Leader, + "old leader is not removed from membership, it is still a leader", + ) + .await?; + } + + Ok(()) +} + /// Change membership from {0,1,2} to {2}. Access {2} at once. /// /// It should not respond a ForwardToLeader error that pointing to the removed leader. From f1819c2e3d542e191b396e8aa3f4a56b600449ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Tue, 7 Nov 2023 09:34:26 +0800 Subject: [PATCH 2/2] Refactor: await the reporting of the replication status in the test case 'add_learner_non_blocking'. In this case, a replication failure status is supposed to be reported to the matrix. But sometimes it times out before the failure report can be delivered. To address this issue, just extend the time out duration. --- tests/tests/membership/t11_add_learner.rs | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/tests/tests/membership/t11_add_learner.rs b/tests/tests/membership/t11_add_learner.rs index db3d66f2e..d193bef62 100644 --- a/tests/tests/membership/t11_add_learner.rs +++ b/tests/tests/membership/t11_add_learner.rs @@ -132,12 +132,27 @@ async fn add_learner_non_blocking() -> Result<()> { let raft = router.get_raft_handle(&0)?; raft.add_learner(1, (), false).await?; - sleep(Duration::from_millis(500)).await; - - let metrics = router.get_raft_handle(&0)?.metrics().borrow().clone(); - let repl = metrics.replication.as_ref().unwrap(); - let n1_repl = repl.get(&1); - assert_eq!(Some(&None), n1_repl, "no replication state to the learner is reported"); + let n = 6; + for i in 0..=n { + if i == n { + unreachable!("no replication status is reported to metrics!"); + } + + let metrics = router.get_raft_handle(&0)?.metrics().borrow().clone(); + let repl = metrics.replication.as_ref().unwrap(); + + // The result is Some(&None) when there is no success replication is made, + // and is None if no replication attempt is made(no success or failure is reported to metrics). + let n1_repl = repl.get(&1); + if n1_repl.is_none() { + tracing::info!("--- no replication attempt is made, sleep and retry: {}-th attempt", i); + + sleep(Duration::from_millis(500)).await; + continue; + } + assert_eq!(Some(&None), n1_repl, "no replication state to the learner is reported"); + break; + } } Ok(())