From 8bc60ed283bf0a9892809a5bf0ea57732b50b532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 17:36:24 +0100 Subject: [PATCH 1/3] Fix worker threads stalling race condition on RESUME command During a RESUME operation, if a 'MySQL_Thread' is bootstrapping listeners (in `MySQL_Thread::run_BootstrapListener`) and detect that `MySQL_Threads_Handler::bootstrapping_listeners` is 'false', the thread prematurely shuts down its own bootstrapping flag from 'mypolls' (`ProxySQL_Poll::bootstrapping_listeners`). Since this thread wont ever bootstrap its corresponding listening sockets, the other working threads will be stalled waiting on it, eventually triggering the watchdog and crashing the instance. Since the bootstrapping operation is sequential, it's expected that all the threads but the one starting their listening sockets are in an active wait. A sensible delay has been introduced to reduce the overhead of such wait. --- include/ProxySQL_Poll.h | 1 - lib/MySQL_Thread.cpp | 16 +++++----------- lib/PgSQL_Thread.cpp | 31 ++++++++++++------------------- lib/ProxySQL_Poll.cpp | 1 - 4 files changed, 17 insertions(+), 32 deletions(-) diff --git a/include/ProxySQL_Poll.h b/include/ProxySQL_Poll.h index 78520ac17..406c0e993 100644 --- a/include/ProxySQL_Poll.h +++ b/include/ProxySQL_Poll.h @@ -35,7 +35,6 @@ class ProxySQL_Poll { T **myds; unsigned long long *last_recv; unsigned long long *last_sent; - std::atomic bootstrapping_listeners; volatile int pending_listener_add; volatile int pending_listener_del; unsigned int poll_timeout; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 5fca5cf23..54c057571 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3127,15 +3127,11 @@ void MySQL_Thread::run_BootstrapListener() { if (n) { poll_listener_add(n); assert(__sync_bool_compare_and_swap(&mypolls.pending_listener_add,n,0)); - } else { - if (GloMTH->bootstrapping_listeners == false) { - // we stop looping - mypolls.bootstrapping_listeners = false; - } } -#ifdef DEBUG - usleep(5+rand()%10); -#endif + // The delay for the active-wait is a fraction of 'poll_timeout'. Since other + // threads may be waiting on poll for further operations, checks are meaningless + // until that timeout expires (other workers make progress). + usleep(std::min(std::max(mysql_thread___poll_timeout/20, 10000), 40000) + (rand() % 2000)); } } @@ -3238,9 +3234,7 @@ void MySQL_Thread::run() { #endif // IDLE_THREADS pthread_mutex_unlock(&thread_mutex); - if (unlikely(mypolls.bootstrapping_listeners == true)) { - run_BootstrapListener(); - } + run_BootstrapListener(); // flush mysql log file GloMyLogger->flush(); diff --git a/lib/PgSQL_Thread.cpp b/lib/PgSQL_Thread.cpp index a28fbf591..bedbba3dc 100644 --- a/lib/PgSQL_Thread.cpp +++ b/lib/PgSQL_Thread.cpp @@ -2998,26 +2998,19 @@ void PgSQL_Thread::run() { #endif // IDLE_THREADS pthread_mutex_unlock(&thread_mutex); - if (unlikely(mypolls.bootstrapping_listeners == true)) { - while ( // spin here if ... - (n = __sync_add_and_fetch(&mypolls.pending_listener_add, 0)) // there is a new listener to add - || - (GloPTH->bootstrapping_listeners == true) // PgSQL_Thread_Handlers has more listeners to configure - ) { - if (n) { - poll_listener_add(n); - assert(__sync_bool_compare_and_swap(&mypolls.pending_listener_add, n, 0)); - } - else { - if (GloPTH->bootstrapping_listeners == false) { - // we stop looping - mypolls.bootstrapping_listeners = false; - } - } -#ifdef DEBUG - usleep(5 + rand() % 10); -#endif + while ( // spin here if ... + (n = __sync_add_and_fetch(&mypolls.pending_listener_add, 0)) // there is a new listener to add + || + (GloPTH->bootstrapping_listeners == true) // PgSQL_Thread_Handlers has more listeners to configure + ) { + if (n) { + poll_listener_add(n); + assert(__sync_bool_compare_and_swap(&mypolls.pending_listener_add, n, 0)); } + // The delay for the active-wait is a fraction of 'poll_timeout'. Since other + // threads may be waiting on poll for further operations, checks are meaningless + // until that timeout expires (other workers make progress). + usleep(std::min(std::max(pgsql_thread___poll_timeout/20, 10000), 40000) + (rand() % 2000)); } proxy_debug(PROXY_DEBUG_NET, 7, "poll_timeout=%u\n", mypolls.poll_timeout); diff --git a/lib/ProxySQL_Poll.cpp b/lib/ProxySQL_Poll.cpp index 66135ad67..425bca51d 100644 --- a/lib/ProxySQL_Poll.cpp +++ b/lib/ProxySQL_Poll.cpp @@ -68,7 +68,6 @@ ProxySQL_Poll::ProxySQL_Poll() { len=0; pending_listener_add=0; pending_listener_del=0; - bootstrapping_listeners = true; size=MIN_POLL_LEN; fds=(struct pollfd *)malloc(size*sizeof(struct pollfd)); myds=(T**)malloc(size*sizeof(T *)); From 74c74adf5e2d0e3ab06e1a704104b8cd1e56098e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 17:36:53 +0100 Subject: [PATCH 2/3] Prevent busy-waiting in active wait during 'listener_del' Since the operation of stopping each worker thread listeners is performed during 'maintenance_loops', the active wait taking place in 'listener_del' is likely to take some time. A sensible delay has been added to reduce unneccesary load. --- lib/MySQL_Thread.cpp | 6 +++++- lib/PgSQL_Thread.cpp | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 54c057571..0b2f76e2b 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -1204,7 +1204,11 @@ int MySQL_Threads_Handler::listener_del(const char *iface) { } for (i=0;imypolls.pending_listener_del,0)); + while(__sync_fetch_and_add(&thr->mypolls.pending_listener_del,0)) { + // Since 'listeners_stop' is performed in 'maintenance_loops' by the + // workers this active-wait is likely to take some time. + usleep(std::min(std::max(mysql_thread___poll_timeout/20, 10000), 40000)); + } } MLM->del(idx); #ifdef SO_REUSEPORT diff --git a/lib/PgSQL_Thread.cpp b/lib/PgSQL_Thread.cpp index bedbba3dc..52cf5a92f 100644 --- a/lib/PgSQL_Thread.cpp +++ b/lib/PgSQL_Thread.cpp @@ -1158,7 +1158,11 @@ int PgSQL_Threads_Handler::listener_del(const char* iface) { } for (i = 0; i < num_threads; i++) { PgSQL_Thread* thr = (PgSQL_Thread*)pgsql_threads[i].worker; - while (__sync_fetch_and_add(&thr->mypolls.pending_listener_del, 0)); + while (__sync_fetch_and_add(&thr->mypolls.pending_listener_del, 0)) { + // Since 'listeners_stop' is performed in 'maintenance_loops' by the + // workers this active-wait is likely to take some time. + usleep(std::min(std::max(pgsql_thread___poll_timeout/20, 10000), 40000)); + } } MLM->del(idx); #ifdef SO_REUSEPORT From 4a75f74f17ecad3a8f83e234c47ae96c3724f0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 20:03:39 +0100 Subject: [PATCH 3/3] Fix potential invalid syntax in TAP test queries This is a follow-up of commit #19c8f8698 --- test/tap/tests/reg_test_4402-mysql_fields-t.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/tap/tests/reg_test_4402-mysql_fields-t.cpp b/test/tap/tests/reg_test_4402-mysql_fields-t.cpp index ff0d3e586..12dac63f5 100644 --- a/test/tap/tests/reg_test_4402-mysql_fields-t.cpp +++ b/test/tap/tests/reg_test_4402-mysql_fields-t.cpp @@ -106,7 +106,9 @@ int main(int argc, char** argv) { // to check table alias issue: { - const std::string& query = "SELECT data FROM testdb.dummy_table AS " + generate_random_string(length); + // NOTE: The randomly generated string should be escaped \`\`, otherwise could collide + // with SQL reserved words, causing an invalid test failure. + const std::string& query = "SELECT data FROM testdb.dummy_table AS `" + generate_random_string(length) + "`"; MYSQL_QUERY__(proxysql, query.c_str()); MYSQL_RES* res = mysql_use_result(proxysql);