Skip to content

Commit

Permalink
Merge pull request #4517 from sysown/v2.x_hostgroup_online_servers
Browse files Browse the repository at this point in the history
Enforcing maximum online servers in a hostgroup
  • Loading branch information
renecannao authored May 17, 2024
2 parents 50e9d9a + ca93da5 commit 5514822
Show file tree
Hide file tree
Showing 10 changed files with 320 additions and 114 deletions.
18 changes: 17 additions & 1 deletion include/MySQL_HostGroups_Manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ class MySrvC { // MySQL Server Container
uint16_t gtid_port;
uint16_t flags;
int64_t weight;
enum MySerStatus status;
unsigned int compression;
int64_t max_connections;
unsigned int aws_aurora_current_lag_us;
Expand Down Expand Up @@ -263,6 +262,11 @@ class MySrvC { // MySQL Server Container
max_connections_used = connections_used;
return max_connections_used;
}
void set_status(MySerStatus _status);
inline
MySerStatus get_status() const { return status; }
private:
enum MySerStatus status;
};

class MySrvList { // MySQL Server List
Expand All @@ -282,6 +286,8 @@ class MySrvList { // MySQL Server List
class MyHGC { // MySQL Host Group Container
public:
unsigned int hid;
std::atomic<uint32_t> num_online_servers;
time_t last_log_time_num_online_servers;
unsigned long long current_time_now;
uint32_t new_connections_now;
MySrvList *mysrvs;
Expand Down Expand Up @@ -313,6 +319,13 @@ class MyHGC { // MySQL Host Group Container
MyHGC(int);
~MyHGC();
MySrvC *get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms, MySQL_Session *sess);
void refresh_online_server_count();
void log_num_online_server_count_error();
inline
bool online_servers_within_threshold() const {
if (num_online_servers.load(std::memory_order_relaxed) <= attributes.max_num_online_servers) return true;
return false;
}
};

class Group_Replication_Info {
Expand Down Expand Up @@ -930,6 +943,9 @@ class MySQL_HostGroups_Manager {
void init();
void wrlock();
void wrunlock();
#ifdef DEBUG
bool is_locked = false;
#endif
int servers_add(SQLite3_result *resultset);
/**
* @brief Generates a new global checksum for module 'mysql_servers_v2' using the provided hash.
Expand Down
82 changes: 58 additions & 24 deletions lib/MyHGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ MyHGC::MyHGC(int _hid) {
servers_defaults.weight = -1;
servers_defaults.max_connections = -1;
servers_defaults.use_ssl = -1;
num_online_servers.store(0, std::memory_order_relaxed);;
last_log_time_num_online_servers = 0;
}

void MyHGC::reset_attributes() {
Expand Down Expand Up @@ -75,39 +77,43 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_
//int j=0;
for (j=0; j<l; j++) {
mysrvc=mysrvs->idx(j);
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { // consider this server only if ONLINE
if (mysrvc->ConnectionsUsed->conns_length() < mysrvc->max_connections) { // consider this server only if didn't reach max_connections
if ( mysrvc->current_latency_us < ( mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000 ) ) { // consider the host only if not too far
if (gtid_trxid) {
if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) {
sum+=mysrvc->weight;
TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length();
mysrvcCandidates[num_candidates]=mysrvc;
num_candidates++;
}
} else {
if (max_lag_ms >= 0) {
if ((unsigned int)max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) {
if (mysrvc->get_status() == MYSQL_SERVER_STATUS_ONLINE) { // consider this server only if ONLINE
if (mysrvc->myhgc->num_online_servers.load(std::memory_order_relaxed) <= mysrvc->myhgc->attributes.max_num_online_servers) { // number of online servers in HG is within configured range
if (mysrvc->ConnectionsUsed->conns_length() < mysrvc->max_connections) { // consider this server only if didn't reach max_connections
if (mysrvc->current_latency_us < (mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000)) { // consider the host only if not too far
if (gtid_trxid) {
if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) {
sum+=mysrvc->weight;
TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length();
mysrvcCandidates[num_candidates]=mysrvc;
num_candidates++;
} else {
sess->thread->status_variables.stvar[st_var_aws_aurora_replicas_skipped_during_query]++;
}
} else {
sum+=mysrvc->weight;
TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length();
mysrvcCandidates[num_candidates]=mysrvc;
num_candidates++;
if (max_lag_ms >= 0) {
if ((unsigned int)max_lag_ms >= mysrvc->aws_aurora_current_lag_us / 1000) {
sum+=mysrvc->weight;
TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length();
mysrvcCandidates[num_candidates]=mysrvc;
num_candidates++;
} else {
sess->thread->status_variables.stvar[st_var_aws_aurora_replicas_skipped_during_query]++;
}
} else {
sum+=mysrvc->weight;
TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length();
mysrvcCandidates[num_candidates]=mysrvc;
num_candidates++;
}
}
}
} else {
max_connections_reached = true;
}
} else {
max_connections_reached = true;
mysrvc->myhgc->log_num_online_server_count_error();
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED) {
if (mysrvc->get_status() == MYSQL_SERVER_STATUS_SHUNNED) {
// try to recover shunned servers
if (mysrvc->shunned_automatic && mysql_thread___shun_recovery_time_sec) {
time_t t;
Expand All @@ -132,7 +138,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_
proxy_info("Unshunning server %s:%d.\n", mysrvc->address, mysrvc->port);
}
#endif
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
mysrvc->set_status(MYSQL_SERVER_STATUS_ONLINE);
mysrvc->shunned_automatic=false;
mysrvc->shunned_and_kill_all_connections=false;
mysrvc->connect_ERR_at_time_last_detected_error=0;
Expand Down Expand Up @@ -223,9 +229,9 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_
}
for (j=0; j<l; j++) {
mysrvc=mysrvs->idx(j);
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED && mysrvc->shunned_automatic==true) {
if (mysrvc->get_status() == MYSQL_SERVER_STATUS_SHUNNED && mysrvc->shunned_automatic == true) {
if ((t - mysrvc->time_last_detected_error) > max_wait_sec) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
mysrvc->set_status(MYSQL_SERVER_STATUS_ONLINE);
mysrvc->shunned_automatic=false;
mysrvc->connect_ERR_at_time_last_detected_error=0;
mysrvc->time_last_detected_error=0;
Expand Down Expand Up @@ -386,3 +392,31 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_
#endif // TEST_AURORA
return NULL; // if we reach here, we couldn't find any target
}

void MyHGC::refresh_online_server_count() {
if (__sync_fetch_and_add(&glovars.shutdown, 0) != 0)
return;
#ifdef DEBUG
assert(MyHGM->is_locked);
#endif
unsigned int online_servers_count = 0;
for (unsigned int i = 0; i < mysrvs->servers->len; i++) {
MySrvC* mysrvc = (MySrvC*)mysrvs->servers->index(i);
if (mysrvc->get_status() == MYSQL_SERVER_STATUS_ONLINE) {
online_servers_count++;
}
}
num_online_servers.store(online_servers_count, std::memory_order_relaxed);
}

void MyHGC::log_num_online_server_count_error() {
const time_t curtime = time(NULL);
// if this is the first time the method is called or if more than 10 seconds have passed since the last log
if (last_log_time_num_online_servers == 0 ||
((curtime - last_log_time_num_online_servers) > 10)) {
last_log_time_num_online_servers = curtime;
proxy_error(
"Number of online servers detected in a hostgroup exceeds the configured maximum online servers. hostgroup:%u, num_online_servers:%u, max_online_servers:%u\n",
hid, num_online_servers.load(std::memory_order_relaxed), attributes.max_num_online_servers);
}
}
Loading

0 comments on commit 5514822

Please sign in to comment.