Skip to content

Commit

Permalink
fix show clients + reload (#764)
Browse files Browse the repository at this point in the history
* docker/Dockerfile: revert setup

this doesnt work idk why

Signed-off-by: rkhapov <[email protected]>
(cherry picked from commit 539fc03)

* docker/reload: add reload with watchdog test

There is problem with show clients and watchdog, added test
will help to fix it in future commits

Signed-off-by: rkhapov <[email protected]>

* sources/storage.h: add storage link into watchdog

To use it later

Signed-off-by: rkhapov <[email protected]>

* test-lag.sh: fix typo

Signed-off-by: rkhapov <[email protected]>

* sources/storage.c: fix use after free

of storage in watchdog cycle, now storage deallocation waits for
watchdog to finish

Signed-off-by: rkhapov <[email protected]>

* docker/Dockerfile: store gdb.py

It is very usefull

Signed-off-by: rkhapov <[email protected]>

* test-reload.sh: remove dbg sleep

Signed-off-by: rkhapov <[email protected]>

* sources/router.c: fix client pool

When routing failed, clients stayed in client pool, this can lead to
use after free in case of no od_router_unroute

Signed-off-by: rkhapov <[email protected]>

* Makefile: add run_test_dbg

Can be useful sometimes

Signed-off-by: rkhapov <[email protected]>

* sources/console.c: fix storage user len

Signed-off-by: rkhapov <[email protected]>

---------

Signed-off-by: rkhapov <[email protected]>
Co-authored-by: rkhapov <[email protected]>
  • Loading branch information
rkhapov and rkhapov authored Feb 14, 2025
1 parent 1357826 commit 0d6bf31
Show file tree
Hide file tree
Showing 14 changed files with 247 additions and 18 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ run_test_asan:
./cleanup-docker.sh
ODYSSEY_TEST_BUILD_TYPE=build_asan docker compose -f ./docker-compose-test.yml up --exit-code-from odyssey

run_test_dbg:
./cleanup-docker.sh
ODYSSEY_TEST_BUILD_TYPE=build_dbg docker compose -f ./docker-compose-test.yml up --exit-code-from odyssey

submit-cov:
mkdir cov-build && cd cov-build
$(COV-BIN-PATH)/cov-build --dir cov-int make -j 4 && tar czvf odyssey.tgz cov-int && curl --form token=$(COV_TOKEN) --form email=$(COV_ISSUER) --form file=@./odyssey.tgz --form version="2" --form description="scalable potgresql connection pooler" https://scan.coverity.com/builds\?project\=yandex%2Fodyssey
Expand Down
6 changes: 4 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ COPY ./docker/odyssey.conf /etc/odyssey/odyssey.conf
COPY ./docker/lagpolling/lag-conf.conf /etc/odyssey/lag-conf.conf
COPY ./docker/prep_stmts/pstmts.conf /etc/odyssey/pstmts.conf
COPY ./docker/config-validation/configs /etc/odyssey/configs
COPY ./docker/reload/config.conf /etc/odyssey/reload-config.conf

COPY --from=golang-tests-builder /ody_integration_test/pkg/ody_integration_test /ody_integration_test
COPY --from=golang-tests-builder /prep_stmts/pkg/pstmts-test /pstmts-test
Expand Down Expand Up @@ -109,13 +110,14 @@ COPY ./docker/group /group
COPY ./docker/xproto /xproto
COPY ./docker/copy /copy
COPY ./docker/gorm /gorm
COPY ./docker/reload /reload

COPY ./third_party/machinarium/gdb/machinarium-gdb.py /gdb.py

FROM test-runner AS entrypoint

COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh

RUN chmod a+x /usr/local/bin/entrypoint.sh

RUN /usr/bin/setup

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
10 changes: 7 additions & 3 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ set -ex
sleep 5
ody-stop

# setup is performed in Dockerfile
sudo -u postgres /usr/lib/postgresql/16/bin/pg_ctl -D /var/lib/postgresql/16/main/ start
sudo -u postgres /usr/lib/postgresql/16/bin/pg_ctl -D /var/lib/postgresql/16/repl/ -o '-p 5433' start
setup

/reload/test-reload.sh
if [ $? -eq 1 ]
then
exit 1
fi

# group
/group/test_group.sh
Expand Down
2 changes: 1 addition & 1 deletion docker/lagpolling/test-lag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ service postgresql restart || true

sleep 1

PGPASSWORD=lolol psql -h localhost -p6432 -dpostgres -Uuser1 -c 'select 3' || true1
PGPASSWORD=lolol psql -h localhost -p6432 -dpostgres -Uuser1 -c 'select 3' || exit 1

# test bad backend connections closed properly

Expand Down
7 changes: 7 additions & 0 deletions docker/reload/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test: show_clients reloads

show_clients:
/reload/reloads.sh

reloads:
/reload/show-clients.sh
152 changes: 152 additions & 0 deletions docker/reload/config.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
pid_file "/var/run/odyssey.pid"
daemonize yes

unix_socket_dir "/tmp"
unix_socket_mode "0644"

log_format "%p %t %l [%i %s] (%c) %m\n"

log_to_stdout yes

log_syslog no
log_syslog_ident "odyssey"
log_syslog_facility "daemon"

log_debug yes
log_config yes
log_session yes
log_query yes
log_stats yes
stats_interval 60

workers 5
resolvers 1

readahead 8192

cache_coroutine 0

coroutine_stack_size 8

nodelay yes

keepalive 15
keepalive_keep_interval 75
keepalive_probes 9

keepalive_usr_timeout 0


listen {
host "*"
port 6432
backlog 128
}


storage "postgres_server" {
type "remote"
host "localhost"
port 5432

watchdog {
authentication "none"

storage "postgres_server"
storage_db "postgres"
storage_user "postgres"

pool_routing "internal"
pool "transaction"
pool_size 10

pool_timeout 0
pool_ttl 1201

pool_discard yes
pool_cancel yes

server_lifetime 1901
log_debug no

watchdog_lag_query "SELECT TRUNC(EXTRACT(EPOCH FROM NOW())) - 100"
watchdog_lag_interval 10
}
}

database default {
user default {
authentication "none"

storage "postgres_server"
pool "transaction"
pool_size 0

pool_timeout 0

pool_ttl 60

pool_discard no

pool_cancel yes

pool_rollback yes

client_fwd_error yes

application_name_add_host yes

server_lifetime 3600
log_debug no

quantiles "0.99,0.95,0.5"
client_max 107
}
}

database "postgres" {
user "user1" {
authentication "none"
storage "postgres_server"

pool "transaction"
pool_size 0
pool_timeout 0
pool_ttl 60
pool_discard no
pool_cancel yes
pool_rollback yes

catchup_timeout 10
catchup_checks 10

client_fwd_error yes

application_name_add_host yes

server_lifetime 3600
log_debug no

quantiles "0.99,0.95,0.5"
client_max 107
}
}
storage "local" {
type "local"
}

database "console" {
user default {
authentication "none"
role "admin"
pool "session"
storage "local"
}
}


locks_dir "/tmp/odyssey"

graceful_die_on_errors yes
enable_online_restart yes
bindwith_reuseport yes
9 changes: 9 additions & 0 deletions docker/reload/reloads.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -e

sleep 2

for run in {1..1000}; do
kill -s HUP $(pidof odyssey) || exit 1
done
9 changes: 9 additions & 0 deletions docker/reload/show-clients.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -e

sleep 1

for run in {1..1000}; do
PGPASSWORD=lolol psql -h localhost -p6432 -dconsole -Uuser1 -c 'show clients' 1>/dev/null
done
17 changes: 17 additions & 0 deletions docker/reload/test-reload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash -x

/usr/bin/odyssey /etc/odyssey/reload-config.conf

make -j 2 -f /reload/Makefile || {
echo "ERROR: simultaneous reloads with 'show clients' failed"

cat /var/log/odyssey.log
echo "
"
cat /var/log/postgresql/postgresql-16-main.log

exit 1
}

ody-stop
1 change: 1 addition & 0 deletions sources/config_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ static int od_config_reader_storage(od_config_reader_t *reader,
extensions) ==
NOT_OK_RESPONSE)
goto error;
storage->watchdog->storage = storage;
continue;
default: {
od_config_reader_error(reader, &token,
Expand Down
2 changes: 1 addition & 1 deletion sources/console.c
Original file line number Diff line number Diff line change
Expand Up @@ -1368,7 +1368,7 @@ static inline int od_console_show_clients_callback(od_client_t *client,
/* storage_user */
rc = kiwi_be_write_data_row_add(stream, offset,
client->rule->storage_user,
client->rule->storage_user_len);
client->rule->storage_user_len - 1);
if (rc == NOT_OK_RESPONSE)
return NOT_OK_RESPONSE;
/* addr */
Expand Down
12 changes: 10 additions & 2 deletions sources/router.c
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,8 @@ od_router_status_t od_router_attach(od_router_t *router, od_client_t *client,
* and do not want to start a new one */
if (route->server_pool.count_active == 0) {
od_route_unlock(route);
// client is in queue state and must be removed from client pool
od_router_unroute(router, client);
return OD_ROUTER_ERROR_TIMEDOUT;
}
} else {
Expand Down Expand Up @@ -638,8 +640,11 @@ od_router_status_t od_router_attach(od_router_t *router, od_client_t *client,
od_route_unlock(route);

int rc = od_io_read_stop(&client->io);
if (rc == -1)
if (rc == -1) {
// client is in queue state and must be removed from client pool
od_router_unroute(router, client);
return OD_ROUTER_ERROR;
}

/*
* Wait wakeup condition for pool_timeout milliseconds.
Expand All @@ -651,8 +656,11 @@ od_router_status_t od_router_attach(od_router_t *router, od_client_t *client,
if (timeout == 0)
timeout = UINT32_MAX;
rc = od_route_wait(route, timeout);
if (rc == -1)
if (rc == -1) {
// client is in queue state and must be removed from client pool
od_router_unroute(router, client);
return OD_ROUTER_ERROR_TIMEDOUT;
}

od_route_lock(route);
}
Expand Down
30 changes: 21 additions & 9 deletions sources/storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ od_storage_watchdog_t *od_storage_watchdog_allocate(od_global_t *global)
memset(watchdog, 0, sizeof(od_storage_watchdog_t));
watchdog->global = global;
watchdog->online = 1;
od_atomic_u64_set(&watchdog->finished, 0ULL);
pthread_mutex_init(&watchdog->mu, NULL);

return watchdog;
Expand All @@ -33,14 +34,25 @@ static inline int od_storage_watchdog_is_online(od_storage_watchdog_t *watchdog)
return ret;
}

static inline int od_storage_watchdog_soft_exit(od_storage_watchdog_t *watchdog)
static inline int
od_storage_watchdog_set_offline(od_storage_watchdog_t *watchdog)
{
pthread_mutex_lock(&watchdog->mu);
watchdog->online = 0;
pthread_mutex_unlock(&watchdog->mu);
return OK_RESPONSE;
}

static inline void
od_storage_watchdog_soft_exit(od_storage_watchdog_t *watchdog)
{
od_storage_watchdog_set_offline(watchdog);
while (od_atomic_u64_of(&watchdog->finished) != 1ULL) {
machine_wait(500);
}
od_storage_watchdog_free(watchdog);
}

int od_storage_watchdog_free(od_storage_watchdog_t *watchdog)
{
if (watchdog == NULL) {
Expand Down Expand Up @@ -83,6 +95,10 @@ od_rule_storage_t *od_rules_storage_allocate(void)

void od_rules_storage_free(od_rule_storage_t *storage)
{
if (storage->watchdog) {
od_storage_watchdog_soft_exit(storage->watchdog);
}

if (storage->name)
free(storage->name);
if (storage->type)
Expand All @@ -94,10 +110,6 @@ void od_rules_storage_free(od_rule_storage_t *storage)
od_tls_opts_free(storage->tls_opts);
}

if (storage->watchdog) {
od_storage_watchdog_soft_exit(storage->watchdog);
}

if (storage->endpoints_count) {
for (size_t i = 0; i < storage->endpoints_count; ++i) {
free(storage->endpoints[i].host);
Expand Down Expand Up @@ -434,11 +446,11 @@ void od_storage_watchdog_watch(void *arg)
od_instance_t *instance = global->instance;

od_debug(&instance->logger, "watchdog", NULL, NULL,
"start lag polling watchdog");
"start watchdog for storage '%s'", watchdog->storage->name);

od_storage_watchdog_do_polling_loop(watchdog);

od_debug(&instance->logger, "watchdog", NULL, NULL,
"deallocating storage watchdog");
od_storage_watchdog_free(watchdog);
od_log(&instance->logger, "watchdog", NULL, NULL,
"finishing watchdog for storage '%s'", watchdog->storage->name);
od_atomic_u64_set(&watchdog->finished, 1ULL);
}
4 changes: 4 additions & 0 deletions sources/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ struct od_storage_watchdog {
int online;

od_global_t *global;

od_rule_storage_t *storage;

od_atomic_u64_t finished;
};

od_storage_watchdog_t *od_storage_watchdog_allocate(od_global_t *);
Expand Down

0 comments on commit 0d6bf31

Please sign in to comment.