Skip to content

Commit

Permalink
cluster checks are now unconditional and have exponential backoff. pr…
Browse files Browse the repository at this point in the history
…eviously they ran at a fixed interval only when there were no commands that could generate a MOVE/ASK error. That wasn't good enough for detecting newly added slaves
  • Loading branch information
slact committed May 17, 2022
1 parent eb1da51 commit 6f46080
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 93 deletions.
53 changes: 43 additions & 10 deletions src/nchan_commands.rb
Original file line number Diff line number Diff line change
Expand Up @@ -593,15 +593,6 @@
default: "4m",
info: "Send a keepalive command to redis to keep the Nchan redis clients from disconnecting. Set to 0 to disable."

nchan_redis_cluster_check_interval [:main, :srv, :upstream, :loc],
:ngx_conf_set_sec_slot,
[:loc_conf, :"redis.cluster_check_interval"],

group: "storage",
tags: ['redis'],
default: "5s",
info: "Send a CLUSTER INFO command to each connected Redis node to see if the cluster config epoch has changed. Sent only when in Cluster mode and if any other command that may result in a MOVE error has not been sent in the configured time."

nchan_redis_load_scripts_unconditionally [:upstream],
:ngx_conf_set_flag_slot,
[:srv_conf, "redis.load_scripts_unconditionally"],
Expand Down Expand Up @@ -677,7 +668,7 @@
default: "0.5 (increase delay by 50% each try)",
info: "Add an exponentially increasing delay to Redis connection retries. `Delay[n] = (Delay[n-1] + jitter) * (nchan_redis_reconnect_delay_backoff + 1)`."

nchan_redis_reconnect_delay_max [:upstream],
nchan_redis_reconnect_delay_max [:upstream],
:ngx_conf_set_msec_slot,
[:srv_conf, :"redis.reconnect_delay.max"],

Expand All @@ -687,6 +678,48 @@
value: "<time> (0 to disable)",
info: "Maximum Redis reconnection delay after backoff and jitter."

nchan_redis_cluster_check_interval_min [:upstream],
:ngx_conf_set_msec_slot,
[:srv_conf, :"redis.cluster_check_interval.min"],

alt: :nchan_redis_cluster_check_interval,
group: "storage",
tags: ['redis'],
value: "<time>",
default: "1s (0 to disable)",
info: "When connected to a cluster, periodically check the cluster state and layout via a random master node."

nchan_redis_cluster_check_interval_jitter [:upstream],
:ngx_conf_set_jitter,
[:srv_conf, :"redis.cluster_check_interval.jitter_multiplier"],

group: "storage",
tags: ['redis'],
value: "<floating point> >= 0, (0 to disable)",
default: "0.2 (20% of inverval value)",
info: "Introduce random jitter to Redis cluster chck interval, where the range is `±(cluster_check_interval * nchan_redis_cluster_check_interval_jitter) / 2`."

nchan_redis_cluster_check_interval_backoff [:upstream],
:ngx_conf_set_exponential_backoff,
[:srv_conf, :"redis.cluster_check_interval.backoff_multiplier"],

group: "storage",
tags: ['redis'],
value: "<floating point> >= 0, ratio of current delay",
default: "2 (increase delay by 200% each try)",
info: "Add an exponentially increasing delay to the Redis cluster check interval. `Delay[n] = (Delay[n-1] + jitter) * (nchan_redis_cluster_check_interval_backoff + 1)`."

nchan_redis_cluster_check_interval_max [:upstream],
:ngx_conf_set_msec_slot,
[:srv_conf, :"redis.cluster_check_interval.max"],

group: "storage",
tags: ['redis'],
value: "<time> (0 to disable)",
default: "30s",
info: "Maximum Redis cluster check interval after backoff and jitter."


nchan_redis_cluster_recovery_delay [:upstream],
:ngx_conf_set_msec_slot,
[:srv_conf, :"redis.cluster_recovery_delay.min"],
Expand Down
65 changes: 52 additions & 13 deletions src/nchan_config_commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -755,19 +755,6 @@ static ngx_command_t nchan_commands[] = {
offsetof(nchan_loc_conf_t, redis.ping_interval),
NULL } ,

{ ngx_string("nchan_redis_cluster_check_interval"),
NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_UPS_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1,
ngx_conf_set_sec_slot,
NGX_HTTP_LOC_CONF_OFFSET,
offsetof(nchan_loc_conf_t, redis.cluster_check_interval),
NULL } ,
{ ngx_string("nchan_redis_cluster_check_interval"), //alt for nchan_redis_cluster_check_interval
NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_UPS_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1,
ngx_conf_set_sec_slot,
NGX_HTTP_LOC_CONF_OFFSET,
offsetof(nchan_loc_conf_t, redis.cluster_check_interval),
NULL } ,

{ ngx_string("nchan_redis_load_scripts_unconditionally"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_flag_slot,
Expand Down Expand Up @@ -885,6 +872,58 @@ static ngx_command_t nchan_commands[] = {
offsetof(nchan_srv_conf_t, redis.reconnect_delay.max),
NULL } ,

{ ngx_string("nchan_redis_cluster_check_interval_min"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_msec_slot,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.min),
NULL } ,
{ ngx_string("nchan_redis_cluster_check_interval"), //alt for nchan_redis_cluster_check_interval_min
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_msec_slot,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.min),
NULL } ,

{ ngx_string("nchan_redis_cluster_check_interval_jitter"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_jitter,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.jitter_multiplier),
NULL } ,
{ ngx_string("nchan_redis_cluster_check_interval_jitter"), //alt for nchan_redis_cluster_check_interval_jitter
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_jitter,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.jitter_multiplier),
NULL } ,

{ ngx_string("nchan_redis_cluster_check_interval_backoff"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_exponential_backoff,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.backoff_multiplier),
NULL } ,
{ ngx_string("nchan_redis_cluster_check_interval_backoff"), //alt for nchan_redis_cluster_check_interval_backoff
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_exponential_backoff,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.backoff_multiplier),
NULL } ,

{ ngx_string("nchan_redis_cluster_check_interval_max"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_msec_slot,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.max),
NULL } ,
{ ngx_string("nchan_redis_cluster_check_interval_max"), //alt for nchan_redis_cluster_check_interval_max
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_msec_slot,
NGX_HTTP_SRV_CONF_OFFSET,
offsetof(nchan_srv_conf_t, redis.cluster_check_interval.max),
NULL } ,

{ ngx_string("nchan_redis_cluster_recovery_delay"),
NGX_HTTP_UPS_CONF|NGX_CONF_TAKE1,
ngx_conf_set_msec_slot,
Expand Down
5 changes: 5 additions & 0 deletions src/nchan_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
#define NCHAN_DEFAULT_REDIS_CLUSTER_CONNECT_TIMEOUT_MSEC 15000
#define NCHAN_DEFAULT_REDIS_CLUSTER_MAX_FAILING_TIME_MSEC 30000

#define NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_MIN_MSEC 1000
#define NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_BACKOFF_MULTIPLIER 2
#define NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_JITTER_MULTIPLIER 0.2
#define NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_MAX_MSEC 30000

#define NCHAN_DEFAULT_REDIS_RECONNECT_DELAY_MIN_MSEC 500
#define NCHAN_DEFAULT_REDIS_RECONNECT_DELAY_BACKOFF_MULTIPLIER 0.5
#define NCHAN_DEFAULT_REDIS_RECONNECT_DELAY_JITTER_MULTIPLIER 0.1
Expand Down
25 changes: 20 additions & 5 deletions src/nchan_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ static void *nchan_create_srv_conf(ngx_conf_t *cf) {
scf->redis.cluster_recovery_delay.jitter_multiplier = NGX_CONF_UNSET;
scf->redis.cluster_recovery_delay.backoff_multiplier = NGX_CONF_UNSET;
scf->redis.cluster_recovery_delay.max = NGX_CONF_UNSET_MSEC;
scf->redis.cluster_check_interval.min = NGX_CONF_UNSET_MSEC;
scf->redis.cluster_check_interval.jitter_multiplier = NGX_CONF_UNSET;
scf->redis.cluster_check_interval.backoff_multiplier = NGX_CONF_UNSET;
scf->redis.cluster_check_interval.max = NGX_CONF_UNSET_MSEC;
scf->redis.cluster_max_failing_msec = NGX_CONF_UNSET_MSEC;
scf->redis.load_scripts_unconditionally = NGX_CONF_UNSET;
scf->redis.optimize_target = NCHAN_REDIS_OPTIMIZE_UNSET;
Expand Down Expand Up @@ -197,6 +201,22 @@ static char *nchan_merge_srv_conf(ngx_conf_t *cf, void *parent, void *child) {
}
ngx_conf_merge_msec_value(conf->redis.cluster_recovery_delay.max, prev->redis.cluster_recovery_delay.max, NCHAN_DEFAULT_REDIS_CLUSTER_RECOVERY_DELAY_MAX_MSEC);


ngx_conf_merge_msec_value(conf->redis.cluster_check_interval.min, prev->redis.cluster_check_interval.min, NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_MIN_MSEC);
if(prev->redis.cluster_check_interval.jitter_multiplier == NGX_CONF_UNSET) {
conf->redis.cluster_check_interval.jitter_multiplier = NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_JITTER_MULTIPLIER;
}
else {
conf->redis.cluster_check_interval.jitter_multiplier = prev->redis.cluster_check_interval.jitter_multiplier;
}
if(prev->redis.cluster_check_interval.backoff_multiplier == NGX_CONF_UNSET) {
conf->redis.cluster_check_interval.backoff_multiplier = NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_BACKOFF_MULTIPLIER;
}
else {
conf->redis.cluster_check_interval.backoff_multiplier = prev->redis.cluster_check_interval.backoff_multiplier;
}
ngx_conf_merge_msec_value(conf->redis.cluster_check_interval.max, prev->redis.cluster_check_interval.max, NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_MAX_MSEC);

ngx_conf_merge_value(conf->redis.load_scripts_unconditionally, prev->redis.load_scripts_unconditionally, 0);

MERGE_UNSET_CONF(conf->redis.optimize_target, prev->redis.optimize_target, NCHAN_REDIS_OPTIMIZE_UNSET, NCHAN_REDIS_OPTIMIZE_CPU);
Expand Down Expand Up @@ -294,7 +314,6 @@ static void *nchan_create_loc_conf(ngx_conf_t *cf) {
ngx_memzero(&lcf->redis, sizeof(lcf->redis));
lcf->redis.url_enabled=NGX_CONF_UNSET;
lcf->redis.ping_interval = NGX_CONF_UNSET;
lcf->redis.cluster_check_interval=NGX_CONF_UNSET;
lcf->redis.upstream_inheritable=NGX_CONF_UNSET;
lcf->redis.storage_mode = REDIS_MODE_CONF_UNSET;
lcf->redis.nostore_fastpublish = NGX_CONF_UNSET;
Expand Down Expand Up @@ -561,10 +580,6 @@ static char * nchan_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) {
ngx_conf_merge_value(conf->redis.ping_interval, up->redis.ping_interval, NGX_CONF_UNSET);
ngx_conf_merge_value(conf->redis.ping_interval, prev->redis.ping_interval, NCHAN_REDIS_DEFAULT_PING_INTERVAL_TIME);

if(up)
ngx_conf_merge_value(conf->redis.cluster_check_interval, up->redis.cluster_check_interval, NGX_CONF_UNSET);
ngx_conf_merge_value(conf->redis.cluster_check_interval, prev->redis.cluster_check_interval, NCHAN_REDIS_DEFAULT_CLUSTER_CHECK_INTERVAL_TIME);

if(up)
ngx_conf_merge_value(conf->redis.nostore_fastpublish, up->redis.nostore_fastpublish, NGX_CONF_UNSET);
ngx_conf_merge_value(conf->redis.nostore_fastpublish, prev->redis.nostore_fastpublish, 0);
Expand Down
2 changes: 1 addition & 1 deletion src/nchan_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ typedef struct {
ngx_str_t url;
ngx_flag_t url_enabled;
time_t ping_interval;
time_t cluster_check_interval;
ngx_str_t namespace;
nchan_redis_storage_mode_t storage_mode;
ngx_int_t nostore_fastpublish;
Expand Down Expand Up @@ -335,6 +334,7 @@ typedef struct {
ngx_int_t load_scripts_unconditionally;
nchan_backoff_settings_t reconnect_delay;
nchan_backoff_settings_t cluster_recovery_delay;
nchan_backoff_settings_t cluster_check_interval;
nchan_redis_optimize_t optimize_target;
ngx_int_t master_weight;
ngx_int_t slave_weight;
Expand Down
Loading

0 comments on commit 6f46080

Please sign in to comment.