Skip to content

Commit

Permalink
use leadership_expiry_ to deem nodes unhealthy
Browse files Browse the repository at this point in the history
  • Loading branch information
antoniofilipovic committed Jul 24, 2024
1 parent f4c8e05 commit 29e643a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
6 changes: 3 additions & 3 deletions include/libnuraft/raft_server.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -886,11 +886,11 @@ protected:
int32 get_quorum_for_election();
int32 get_quorum_for_commit();
int32 get_leadership_expiry();
std::list<ptr<peer>> get_not_responding_peers();
size_t get_not_responding_peers_count();
std::list<ptr<peer>> get_not_responding_peers(int expiry = 0);
size_t get_not_responding_peers_count(int expiry = 0);
size_t get_num_stale_peers();

void apply_to_not_responding_peers(const std::function<void(const ptr<peer>&)>&);
void apply_to_not_responding_peers(const std::function<void(const ptr<peer>&)>&, int expiry = 0);

ptr<resp_msg> handle_append_entries(req_msg& req);
ptr<resp_msg> handle_prevote_req(req_msg& req);
Expand Down
32 changes: 20 additions & 12 deletions src/raft_server.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -602,13 +602,13 @@ int32 raft_server::get_leadership_expiry() {
int expiry = params->leadership_expiry_;
if (expiry == 0) {
// If 0, default expiry: 20x of heartbeat.
expiry = params->heart_beat_interval_ *
raft_server::raft_limits_.leadership_limit_;
return params->heart_beat_interval_ *
raft_server::raft_limits_.leadership_limit_;
}
return expiry;
}

std::list<ptr<peer>> raft_server::get_not_responding_peers() {
std::list<ptr<peer>> raft_server::get_not_responding_peers(int expiry) {
std::list<ptr<peer>> rs;
auto cb = [&rs](const ptr<peer>& peer_ptr) {
rs.push_back(peer_ptr);
Expand All @@ -617,21 +617,25 @@ std::list<ptr<peer>> raft_server::get_not_responding_peers() {
return rs;
}

size_t raft_server::get_not_responding_peers_count() {
size_t raft_server::get_not_responding_peers_count(int expiry) {
size_t num_not_resp_nodes = 0;
auto cb = [&num_not_resp_nodes](const ptr<peer>&) {
++num_not_resp_nodes;
};
apply_to_not_responding_peers(cb);
apply_to_not_responding_peers(cb, expiry);
return num_not_resp_nodes;
}

void raft_server::apply_to_not_responding_peers(
const std::function<void(const ptr<peer>&)>& callback) {
const std::function<void(const ptr<peer>&)>& callback, int expiry) {
// Check if quorum nodes are not responding
// (i.e., don't respond 20x heartbeat time long).
// (i.e., don't respond 20x heartbeat time long or expiry if sent as argument).
// default argument for expiry is used in case user defines leadership_expiry_.
ptr<raft_params> params = ctx_->get_params();
int expiry = params->heart_beat_interval_ * raft_server::raft_limits_.response_limit_;

if(expiry == 0){
expiry = params->heart_beat_interval_ * raft_server::raft_limits_.response_limit_;
}

// Check not responding peers.
for (auto& entry: peers_) {
Expand Down Expand Up @@ -1110,12 +1114,16 @@ bool raft_server::check_leadership_validity() {
// Check if quorum is not responding.
int32 num_voting_members = get_num_voting_members();


int leadership_expiry = get_leadership_expiry();
int32 nr_peers = (int32)get_not_responding_peers_count();
if (leadership_expiry < 0) {
// Negative expiry: leadership will never expire.
nr_peers = 0;

int32 nr_peers{0};

// Negative expiry: leadership will never expire.
if(leadership_expiry > 0){
nr_peers = (int32)get_not_responding_peers_count(leadership_expiry);
}

int32 min_quorum_size = get_quorum_for_commit() + 1;
if ( (num_voting_members - nr_peers) < min_quorum_size ) {
p_er("%d nodes (out of %d, %zu including learners) are not "
Expand Down

0 comments on commit 29e643a

Please sign in to comment.