From 4cc43a96f67e155a3526a29816510536ef9380c7 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Mon, 18 Feb 2019 22:48:55 +0800 Subject: [PATCH 01/99] benchmark: add auth check in benchmark When we run benchmark but forget to set the right requirepass, benchmark should return error. --- src/redis-benchmark.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 31f91eb0f..4f0f3404a 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -204,6 +204,12 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); + } + else if (strlen(c->context->reader->buf)>=32 + && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + { + fprintf(stderr,"Error: %s\n",c->context->reader->buf); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 0f0f787a37e6411a02d9a992ecc7bb8af7decf9a Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 27 Feb 2019 21:20:00 +0800 Subject: [PATCH 02/99] fix: fix sentinel command table and new flags format --- src/sentinel.c | 13 +++++++++---- src/server.h | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index 4d03c9c12..92ea75436 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -450,11 +450,11 @@ struct redisCommand sentinelcmds[] = { {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0}, {"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0}, {"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0}, - {"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0}, - {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0}, + {"role",sentinelRoleCommand,1,"ok-loading",0,NULL,0,0,0,0,0}, + {"client",clientCommand,-2,"read-only no-script",0,NULL,0,0,0,0,0}, {"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}, - {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0}, - {"hello",helloCommand,-2,"sF",0,NULL,0,0,0,0,0} + {"auth",authCommand,2,"no-script ok-loading ok-stale fast",0,NULL,0,0,0,0,0}, + {"hello",helloCommand,-2,"no-script fast",0,NULL,0,0,0,0,0} }; /* This function overwrites a few normal Redis config default with Sentinel @@ -477,6 +477,11 @@ void initSentinel(void) { retval = dictAdd(server.commands, sdsnew(cmd->name), cmd); serverAssert(retval == DICT_OK); + + /* Translate the command string flags description into an actual + * set of flags. */ + if (populateCommandTableParseFlags(cmd,cmd->sflags) == C_ERR) + serverPanic("Unsupported command flag"); } /* Initialize various data structures. */ diff --git a/src/server.h b/src/server.h index 994952654..c29a40b6b 100644 --- a/src/server.h +++ b/src/server.h @@ -2264,6 +2264,7 @@ void serverLogHexDump(int level, char *descr, void *value, size_t len); int memtest_preserving_test(unsigned long *m, size_t bytes, int passes); void mixDigest(unsigned char *digest, void *ptr, size_t len); void xorDigest(unsigned char *digest, void *ptr, size_t len); +int populateCommandTableParseFlags(struct redisCommand *c, char *strflags); #define redisDebug(fmt, ...) \ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) From ad223e204222dca7758eb540a455bca93e62b861 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 13 Mar 2019 20:46:33 +0800 Subject: [PATCH 03/99] fix: fix benchmark cannot exit when NOAUTH err happens --- src/redis-benchmark.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2c53bc936..edeaf3a25 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -419,11 +419,10 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } - else if (strlen(c->context->reader->buf)>=32 - && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) { fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 822a992f913484162ce508fdb073d8f2ddb6d7c8 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:00:33 +0200 Subject: [PATCH 04/99] fix: missing initialization. --- src/module.c | 1 + src/modules/hellofilter.c => tests/modules/commandfilter.c | 0 tests/{modules => unit/moduleapi}/commandfilter.tcl | 0 3 files changed, 1 insertion(+) rename src/modules/hellofilter.c => tests/modules/commandfilter.c (100%) rename tests/{modules => unit/moduleapi}/commandfilter.tcl (100%) diff --git a/src/module.c b/src/module.c index ff7f27cdd..f468d4996 100644 --- a/src/module.c +++ b/src/module.c @@ -753,6 +753,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api module->usedby = listCreate(); module->using = listCreate(); module->filters = listCreate(); + module->in_call = 0; ctx->module = module; } diff --git a/src/modules/hellofilter.c b/tests/modules/commandfilter.c similarity index 100% rename from src/modules/hellofilter.c rename to tests/modules/commandfilter.c diff --git a/tests/modules/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl similarity index 100% rename from tests/modules/commandfilter.tcl rename to tests/unit/moduleapi/commandfilter.tcl From ec0b6bd2c35a617101a2e874307be8ae9b504ac0 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:03:03 +0200 Subject: [PATCH 05/99] Add runtest-moduleapi with commandfilter coverage. --- runtest-moduleapi | 16 +++++++++++++++ src/modules/Makefile | 7 +------ tests/modules/Makefile | 24 ++++++++++++++++++++++ tests/modules/commandfilter.c | 28 +++++++++++++------------- tests/test_helper.tcl | 1 - tests/unit/moduleapi/commandfilter.tcl | 16 +++++++-------- 6 files changed, 63 insertions(+), 29 deletions(-) create mode 100755 runtest-moduleapi create mode 100644 tests/modules/Makefile diff --git a/runtest-moduleapi b/runtest-moduleapi new file mode 100755 index 000000000..84cdb9bb8 --- /dev/null +++ b/runtest-moduleapi @@ -0,0 +1,16 @@ +#!/bin/sh +TCL_VERSIONS="8.5 8.6" +TCLSH="" + +for VERSION in $TCL_VERSIONS; do + TCL=`which tclsh$VERSION 2>/dev/null` && TCLSH=$TCL +done + +if [ -z $TCLSH ] +then + echo "You need tcl 8.5 or newer in order to run the Redis test" + exit 1 +fi + +make -C tests/modules && \ +$TCLSH tests/test_helper.tcl --single unit/moduleapi/commandfilter "${@}" diff --git a/src/modules/Makefile b/src/modules/Makefile index 537aa0daf..4f6b50f2e 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so hellofilter.so +all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -47,11 +47,6 @@ hellodict.xo: ../redismodule.h hellodict.so: hellodict.xo -hellofilter.xo: ../redismodule.h - -hellofilter.so: hellofilter.xo - $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc - testmodule.xo: ../redismodule.h testmodule.so: testmodule.xo diff --git a/tests/modules/Makefile b/tests/modules/Makefile new file mode 100644 index 000000000..014d20afa --- /dev/null +++ b/tests/modules/Makefile @@ -0,0 +1,24 @@ + +# find the OS +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') + +# Compile flags for linux / osx +ifeq ($(uname_S),Linux) + SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -shared +else + SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup +endif + +.SUFFIXES: .c .so .xo .o + +all: commandfilter.so + +.c.xo: + $(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ + +commandfilter.xo: ../../src/redismodule.h + +commandfilter.so: commandfilter.xo + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc diff --git a/tests/modules/commandfilter.c b/tests/modules/commandfilter.c index 448e12983..d25d49c44 100644 --- a/tests/modules/commandfilter.c +++ b/tests/modules/commandfilter.c @@ -1,18 +1,18 @@ #define REDISMODULE_EXPERIMENTAL_API -#include "../redismodule.h" +#include "redismodule.h" #include static RedisModuleString *log_key_name; -static const char log_command_name[] = "hellofilter.log"; -static const char ping_command_name[] = "hellofilter.ping"; -static const char unregister_command_name[] = "hellofilter.unregister"; +static const char log_command_name[] = "commandfilter.log"; +static const char ping_command_name[] = "commandfilter.ping"; +static const char unregister_command_name[] = "commandfilter.unregister"; static int in_log_command = 0; static RedisModuleCommandFilter *filter = NULL; -int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -23,7 +23,7 @@ int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, return REDISMODULE_OK; } -int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -39,7 +39,7 @@ int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a return REDISMODULE_OK; } -int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleString *s = RedisModule_CreateString(ctx, "", 0); @@ -74,9 +74,9 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) +void CommandFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - if (in_log_command) return; /* don't process our own RM_Call() from HelloFilter_LogCommand() */ + if (in_log_command) return; /* don't process our own RM_Call() from CommandFilter_LogCommand() */ /* Fun manipulations: * - Remove @delme @@ -117,7 +117,7 @@ void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) + if (RedisModule_Init(ctx,"commandfilter",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (argc != 2) { @@ -130,18 +130,18 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_StringToLongLong(argv[1], &noself); if (RedisModule_CreateCommand(ctx,log_command_name, - HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,ping_command_name, - HelloFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,unregister_command_name, - HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter, + if ((filter = RedisModule_RegisterCommandFilter(ctx, CommandFilter_CommandFilter, noself ? REDISMODULE_CMDFILTER_NOSELF : 0)) == NULL) return REDISMODULE_ERR; diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f281526..568eacdee 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/moduleapi/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl index 1e5c41d2b..6078f64f2 100644 --- a/tests/unit/moduleapi/commandfilter.tcl +++ b/tests/unit/moduleapi/commandfilter.tcl @@ -1,4 +1,4 @@ -set testmodule [file normalize src/modules/hellofilter.so] +set testmodule [file normalize tests/modules/commandfilter.so] start_server {tags {"modules"}} { r module load $testmodule log-key 0 @@ -27,7 +27,7 @@ start_server {tags {"modules"}} { test {Command Filter applies on RM_Call() commands} { r del log-key - r hellofilter.ping + r commandfilter.ping r lrange log-key 0 -1 } "{ping @log}" @@ -39,13 +39,13 @@ start_server {tags {"modules"}} { test {Command Filter applies on Lua redis.call() that calls a module} { r del log-key - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 r lrange log-key 0 -1 } "{ping @log}" test {Command Filter is unregistered implicitly on module unload} { r del log-key - r module unload hellofilter + r module unload commandfilter r set mykey @log r lrange log-key 0 -1 } {} @@ -59,14 +59,14 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] # Unregister - r hellofilter.unregister + r commandfilter.unregister r del log-key r set mykey @log r lrange log-key 0 -1 } {} - r module unload hellofilter + r module unload commandfilter r module load $testmodule log-key 1 test {Command Filter REDISMODULE_CMDFILTER_NOSELF works as expected} { @@ -74,10 +74,10 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] r del log-key - r hellofilter.ping + r commandfilter.ping assert_equal {} [r lrange log-key 0 -1] - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 assert_equal {} [r lrange log-key 0 -1] } From acba2fc9b4c8082e5344d2d53e51dc4c1c37942c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 24 Mar 2019 13:10:55 +0200 Subject: [PATCH 06/99] slave corrupts replication stream when module blocked client uses large reply (or POSTPONED_ARRAY) when redis appends the blocked client reply list to the real client, it didn't bother to check if it is in fact the master client. so a slave executing that module command will send replies to the master, causing the master to send the slave error responses, which will mess up the replication offset (slave will advance it's replication offset, and the master does not) --- src/module.c | 7 +------ src/networking.c | 13 +++++++++++++ src/server.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/module.c b/src/module.c index ff7f27cdd..0c8197ac7 100644 --- a/src/module.c +++ b/src/module.c @@ -3747,12 +3747,7 @@ void moduleHandleBlockedClients(void) { * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ if (c) { - if (bc->reply_client->bufpos) - addReplyProto(c,bc->reply_client->buf, - bc->reply_client->bufpos); - if (listLength(bc->reply_client->reply)) - listJoin(c->reply,bc->reply_client->reply); - c->reply_bytes += bc->reply_client->reply_bytes; + AddReplyFromClient(c, bc->reply_client); } freeClient(bc->reply_client); diff --git a/src/networking.c b/src/networking.c index 09cbff387..7fdd1984d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -744,6 +744,19 @@ void addReplySubcommandSyntaxError(client *c) { sdsfree(cmd); } +/* Append 'src' client output buffers into 'dst' client output buffers. + * This function clears the output buffers of 'src' */ +void AddReplyFromClient(client *dst, client *src) { + if (prepareClientToWrite(dst) != C_OK) + return; + addReplyProto(dst,src->buf, src->bufpos); + if (listLength(src->reply)) + listJoin(dst->reply,src->reply); + dst->reply_bytes += src->reply_bytes; + src->reply_bytes = 0; + src->bufpos = 0; +} + /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ diff --git a/src/server.h b/src/server.h index 95e0355a6..dfd9f7698 100644 --- a/src/server.h +++ b/src/server.h @@ -1529,6 +1529,7 @@ void addReplyNullArray(client *c); void addReplyBool(client *c, int b); void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext); void addReplyProto(client *c, const char *s, size_t len); +void AddReplyFromClient(client *c, client *src); void addReplyBulk(client *c, robj *obj); void addReplyBulkCString(client *c, const char *s); void addReplyBulkCBuffer(client *c, const void *p, size_t len); From 4de88828d9d64b7d64b5ee75f9fb8d25aa1dfaa5 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 28 Mar 2019 06:38:16 +0000 Subject: [PATCH 07/99] build fix --- src/networking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/networking.c b/src/networking.c index 09cbff387..02fc44e75 100644 --- a/src/networking.c +++ b/src/networking.c @@ -29,6 +29,7 @@ #include "server.h" #include "atomicvar.h" +#include #include #include #include From 75648f99a5ba41812c115f83f8b668f030acfaee Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 17:39:22 +0200 Subject: [PATCH 08/99] Fix assert comparison in fetchClusterSlotsConfiguration(). --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 12e9f7e41..4e2662f21 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1192,7 +1192,7 @@ static int fetchClusterSlotsConfiguration(client c) { assert(reply->type == REDIS_REPLY_ARRAY); for (i = 0; i < reply->elements; i++) { redisReply *r = reply->element[i]; - assert(r->type = REDIS_REPLY_ARRAY); + assert(r->type == REDIS_REPLY_ARRAY); assert(r->elements >= 3); int from, to, slot; from = r->element[0]->integer; From f8a9708aa705b6493ef63a82e42ed428997b817a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:06:50 +0200 Subject: [PATCH 09/99] ACL: regression test for #5998. --- tests/unit/acl.tcl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 82c75f82d..90f2c9bbf 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -108,4 +108,11 @@ start_server {tags {"acl"}} { assert_match {*+debug|segfault*} $cmdstr assert_match {*+acl*} $cmdstr } + + test {ACL regression: memory leaks adding / removing subcommands} { + r AUTH default "" + r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c + r ACL setuser newuser -debug + # The test framework will detect a leak if any. + } } From c24e32041b91ac32626e8d8eee1c062942e25f27 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:08:37 +0200 Subject: [PATCH 10/99] ACL: Fix memory leak in ACLResetSubcommandsForCommand(). This commit fixes bug reported at #5998. Thanks to @tomcat1102. --- src/acl.c | 2 ++ tests/unit/acl.tcl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/acl.c b/src/acl.c index d9f431f4f..0205e51ad 100644 --- a/src/acl.c +++ b/src/acl.c @@ -542,6 +542,8 @@ struct redisCommand *ACLLookupCommand(const char *name) { * and command ID. */ void ACLResetSubcommandsForCommand(user *u, unsigned long id) { if (u->allowed_subcommands && u->allowed_subcommands[id]) { + for (int i = 0; u->allowed_subcommands[id][i]; i++) + sdsfree(u->allowed_subcommands[id][i]); zfree(u->allowed_subcommands[id]); u->allowed_subcommands[id] = NULL; } diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 90f2c9bbf..058441433 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -109,7 +109,7 @@ start_server {tags {"acl"}} { assert_match {*+acl*} $cmdstr } - test {ACL regression: memory leaks adding / removing subcommands} { + test {ACL #5998 regression: memory leaks adding / removing subcommands} { r AUTH default "" r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c r ACL setuser newuser -debug From d490752d58ecd0a815bdbbb350b550919fcc7a4a Mon Sep 17 00:00:00 2001 From: yongman Date: Tue, 9 Apr 2019 09:24:22 +0800 Subject: [PATCH 11/99] Fix memleak in bitfieldCommand --- src/bitops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 8d03a7699..ee1ce0460 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -994,12 +994,18 @@ void bitfieldCommand(client *c) { /* Lookup for read is ok if key doesn't exit, but errors * if it's not a string. */ o = lookupKeyRead(c->db,c->argv[1]); - if (o != NULL && checkType(c,o,OBJ_STRING)) return; + if (o != NULL && checkType(c,o,OBJ_STRING)) { + zfree(ops); + return; + } } else { /* Lookup by making room up to the farest bit reached by * this operation. */ if ((o = lookupStringForBitCommand(c, - highest_write_offset)) == NULL) return; + highest_write_offset)) == NULL) { + zfree(ops); + return; + } } addReplyArrayLen(c,numops); From 9e67691ffb4709535b56a089a973c3f89bfbe231 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Apr 2019 18:53:27 +0200 Subject: [PATCH 12/99] Aesthetic change to #5962 to conform to Redis style. --- src/module.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 0c8197ac7..1e7c0eca3 100644 --- a/src/module.c +++ b/src/module.c @@ -3746,9 +3746,7 @@ void moduleHandleBlockedClients(void) { * replies to send to the client in a thread safe context. * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ - if (c) { - AddReplyFromClient(c, bc->reply_client); - } + if (c) AddReplyFromClient(c, bc->reply_client); freeClient(bc->reply_client); if (c != NULL) { From 3ccdcbc0880e4f6a6e577bed29826064d03c9509 Mon Sep 17 00:00:00 2001 From: James Rouzier Date: Thu, 11 Apr 2019 12:19:02 -0400 Subject: [PATCH 13/99] Fix start and end key initialize --- src/t_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index f4ace87a2..9e7d3d126 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -492,14 +492,14 @@ void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamI streamEncodeID(si->start_key,start); } else { si->start_key[0] = 0; - si->start_key[0] = 0; + si->start_key[1] = 0; } if (end) { streamEncodeID(si->end_key,end); } else { si->end_key[0] = UINT64_MAX; - si->end_key[0] = UINT64_MAX; + si->end_key[1] = UINT64_MAX; } /* Seek the correct node in the radix tree. */ From 487601d85d95acf71414dee8328e65e8b4fafe08 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 15 Apr 2019 16:50:26 +0200 Subject: [PATCH 14/99] Test: disable module testing for now. --- tests/test_helper.tcl | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f281526..568eacdee 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From fa97ef34ca5899ef482c543d7122a41fba8e4509 Mon Sep 17 00:00:00 2001 From: git-hulk Date: Tue, 23 Apr 2019 20:08:14 +0800 Subject: [PATCH 15/99] FIX: core dump in redis-benchmark when the `-r` is the last arg --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f21..2759e6a3c 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1294,7 +1294,7 @@ int parseOptions(int argc, const char **argv) { if (*p < '0' || *p > '9') goto invalid; } config.randomkeys = 1; - config.randomkeys_keyspacelen = atoi(argv[++i]); + config.randomkeys_keyspacelen = atoi(next); if (config.randomkeys_keyspacelen < 0) config.randomkeys_keyspacelen = 0; } else if (!strcmp(argv[i],"-q")) { From bc36404c79b50e80583d4a98a7211bd239fc88f9 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 18:50:51 +0800 Subject: [PATCH 16/99] update --- src/redis-benchmark.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 7bac6fdd4..4e2662f21 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -418,11 +418,6 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); - } - else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) - { - fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 4e38ced4886446efb70f96685a6a6dfa344095d4 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 19:47:07 +0800 Subject: [PATCH 17/99] fix: benchmark auth fails when server have requirepass --- src/redis-benchmark.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f21..e4134c9ea 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -254,6 +254,19 @@ static redisConfig *getRedisConfig(const char *ip, int port, else fprintf(stderr,"%s: %s\n",hostsocket,err); goto fail; } + + if(config.auth){ + void *authReply = NULL; + redisAppendCommand(c, "AUTH %s", config.auth); + if (REDIS_OK != redisGetReply(c, &authReply)) goto fail; + if (reply) freeReplyObject(reply); + reply = ((redisReply *) authReply); + if (reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "ERROR: %s\n", reply->str); + goto fail; + } + } + redisAppendCommand(c, "CONFIG GET %s", "save"); redisAppendCommand(c, "CONFIG GET %s", "appendonly"); int i = 0; From 162208f94d57b95ef57a3615549f244691213234 Mon Sep 17 00:00:00 2001 From: abhay Date: Thu, 25 Apr 2019 13:50:25 +0530 Subject: [PATCH 18/99] removed obsolete warning as per - https://github.com/antirez/redis/issues/5291 --- redis.conf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/redis.conf b/redis.conf index 5ea915905..060510768 100644 --- a/redis.conf +++ b/redis.conf @@ -942,13 +942,7 @@ aof-use-rdb-preamble yes lua-time-limit 5000 ################################ REDIS CLUSTER ############################### -# -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however -# in order to mark it as "mature" we need to wait for a non trivial percentage -# of users to deploy it in production. -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# + # Normal Redis instances can't be part of a Redis Cluster; only nodes that are # started as cluster nodes can. In order to start a Redis instance as a # cluster node enable the cluster support uncommenting the following: From bcac165fabcbec43843800e3f2fcb69a201d8b50 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Mon, 29 Apr 2019 14:38:28 +0800 Subject: [PATCH 19/99] aof: enhance AOF_FSYNC_EVERYSEC, more details in #5985 --- src/aof.c | 34 +++++++++++++++++++++++++++++++--- src/server.h | 1 + 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 615eebd01..4744847d2 100644 --- a/src/aof.c +++ b/src/aof.c @@ -197,6 +197,12 @@ ssize_t aofRewriteBufferWrite(int fd) { * AOF file implementation * ------------------------------------------------------------------------- */ +/* Return true if an AOf fsync is currently already in progress in a + * BIO thread. */ +int aofFsyncInProgress(void) { + return bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; +} + /* Starts a background task that performs fsync() against the specified * file descriptor (the one of the AOF file) in another thread. */ void aof_background_fsync(int fd) { @@ -335,10 +341,24 @@ void flushAppendOnlyFile(int force) { int sync_in_progress = 0; mstime_t latency; - if (sdslen(server.aof_buf) == 0) return; + if (sdslen(server.aof_buf) == 0) { + /* Check if we need to do fsync even the aof buffer is empty, + * because previously in AOF_FSYNC_EVERYSEC mode, fsync is + * called only when aof buffer is not empty, so if users + * stop write commands before fsync called in one second, + * the data in page cache cannot be flushed in time. */ + if (server.aof_fsync == AOF_FSYNC_EVERYSEC && + server.aof_fsync_offset != server.aof_current_size && + server.unixtime > server.aof_last_fsync && + !(sync_in_progress = aofFsyncInProgress())) { + goto try_fsync; + } else { + return; + } + } if (server.aof_fsync == AOF_FSYNC_EVERYSEC) - sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; + sync_in_progress = aofFsyncInProgress(); if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { /* With this append fsync policy we do background fsyncing. @@ -470,6 +490,7 @@ void flushAppendOnlyFile(int force) { server.aof_buf = sdsempty(); } +try_fsync: /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && @@ -484,10 +505,14 @@ void flushAppendOnlyFile(int force) { redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */ latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); + server.aof_fsync_offset = server.aof_current_size; server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { - if (!sync_in_progress) aof_background_fsync(server.aof_fd); + if (!sync_in_progress) { + aof_background_fsync(server.aof_fd); + server.aof_fsync_offset = server.aof_current_size; + } server.aof_last_fsync = server.unixtime; } } @@ -694,6 +719,7 @@ int loadAppendOnlyFile(char *filename) { * operation is received. */ if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { server.aof_current_size = 0; + server.aof_fsync_offset = server.aof_current_size; fclose(fp); return C_ERR; } @@ -832,6 +858,7 @@ int loadAppendOnlyFile(char *filename) { stopLoading(); aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_fsync_offset = server.aof_current_size; return C_OK; readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */ @@ -1741,6 +1768,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_current_size = server.aof_current_size; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ diff --git a/src/server.h b/src/server.h index dfd9f7698..e7f01b2ef 100644 --- a/src/server.h +++ b/src/server.h @@ -1140,6 +1140,7 @@ struct redisServer { off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size. */ + off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */ pid_t aof_child_pid; /* PID if rewriting process */ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */ From ba809f26d4bd81d23fa929d0c018f235ab298564 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 5 May 2019 08:19:52 +0300 Subject: [PATCH 20/99] make replication tests more stable on slow machines solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant. --- tests/integration/psync2.tcl | 5 ++++- tests/integration/replication-psync.tcl | 26 +++++++++++++++++++++++++ tests/unit/maxmemory.tcl | 7 ++++--- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 8663d6fcc..d1212b640 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -166,12 +166,15 @@ start_server {} { # Pick a random slave set slave_id [expr {($master_id+1)%5}] set sync_count [status $R($master_id) sync_full] + set sync_partial [status $R($master_id) sync_partial_ok] catch { $R($slave_id) config rewrite $R($slave_id) debug restart } + # note: just waiting for connected_slaves==4 has a race condition since + # we might do the check before the master realized that the slave disconnected wait_for_condition 50 1000 { - [status $R($master_id) connected_slaves] == 4 + [status $R($master_id) sync_partial_ok] == $sync_partial + 1 } else { fail "Replica not reconnecting" } diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index a3bce2a4c..bf8682446 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec stop_bg_complex_data $load_handle0 stop_bg_complex_data $load_handle1 stop_bg_complex_data $load_handle2 + + # Wait for the slave to reach the "online" + # state from the POV of the master. + set retry 5000 + while {$retry} { + set info [$master info] + if {[string match {*slave0:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slave not correctly synchronized" + } + + # Wait that slave acknowledge it is online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. (-LOADING error) + wait_for_condition 5000 100 { + [lindex [$slave role] 3] eq {connected} + } else { + fail "Slave still not connected after some time" + } + set retry 10 while {$retry && ([$master debug digest] ne [$slave debug digest])}\ { diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index 1def57af5..0f64ddc18 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} } # make sure master doesn't disconnect slave because of timeout - $master config set repl-timeout 300 ;# 5 minutes + $master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines) $master config set maxmemory-policy allkeys-random $master config set client-output-buffer-limit "replica 100000000 100000000 300" $master config set repl-backlog-size [expr {10*1024}] @@ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} assert {[$master dbsize] == 100} assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers - assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta < $delta_max && $delta > -$delta_max} $master client kill type slave set killed_used [s -1 used_memory] @@ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}] set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}] assert {$killed_slave_buf == 0} - assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max} } # unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server From 9f3679880a7cebc3ce73142e2e19ae3e1150f457 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 5 May 2019 20:32:53 +0300 Subject: [PATCH 21/99] Preserve client->id for blocked clients. --- src/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index c29521670..7dee7e776 100644 --- a/src/module.c +++ b/src/module.c @@ -3866,7 +3866,10 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { * in order to keep things like the currently selected database and similar * things. */ ctx->client = createClient(-1); - if (bc) selectDb(ctx->client,bc->dbid); + if (bc) { + selectDb(ctx->client,bc->dbid); + ctx->client->id = bc->client->id; + } return ctx; } From 1c0913dc4e22701726b3a39386a17a83058ad24c Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Mon, 6 May 2019 11:46:07 +0800 Subject: [PATCH 22/99] Do not active expire keys in the background when the switch is off. --- src/server.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679cd..eaa7172a7 100644 --- a/src/server.c +++ b/src/server.c @@ -1674,10 +1674,12 @@ void clientsCron(void) { void databasesCron(void) { /* Expire keys by random sampling. Not required for slaves * as master will synthesize DELs for us. */ - if (server.active_expire_enabled && server.masterhost == NULL) { - activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); - } else if (server.masterhost != NULL) { - expireSlaveKeys(); + if (server.active_expire_enabled) { + if (server.masterhost == NULL) { + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); + } else { + expireSlaveKeys(); + } } /* Defrag keys gradually. */ From b1c7e3393d8c6bfd357c981b11b7a84426ccff0d Mon Sep 17 00:00:00 2001 From: liaotonglang Date: Mon, 6 May 2019 17:15:49 +0800 Subject: [PATCH 23/99] delete sdsTest() from REDIS_TEST sdsTest() defined in sds.c dit not match the call in server.c. remove it from REDIS_TEST, since test-sds defined in Makefile. --- src/server.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679cd..674eef206 100644 --- a/src/server.c +++ b/src/server.c @@ -4718,8 +4718,6 @@ int main(int argc, char **argv) { return sha1Test(argc, argv); } else if (!strcasecmp(argv[2], "util")) { return utilTest(argc, argv); - } else if (!strcasecmp(argv[2], "sds")) { - return sdsTest(argc, argv); } else if (!strcasecmp(argv[2], "endianconv")) { return endianconvTest(argc, argv); } else if (!strcasecmp(argv[2], "crc64")) { From f468e653b5e683f945b0a4a6665c3155cc768a45 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Oct 2017 08:35:05 +0200 Subject: [PATCH 24/99] Threaded IO: implement handleClientsWithPendingWritesUsingThreads(). This is just an experiment for now, there are a couple of race conditions, mostly harmless for the performance gain experiment that this commit represents so far. The general idea here is to take Redis single threaded and instead fan-out on expansive kernel calls: write(2) in this case, but the same concept could be easily implemented for read(2) and protcol parsing. However just threading writes like in this commit, is enough to evaluate if the approach is sounding. --- src/networking.c | 156 +++++++++++++++++++++++++++++++++++++++++++++-- src/server.c | 11 ++-- src/server.h | 4 ++ 3 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index ffb435625..3958e4f5e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1065,9 +1065,17 @@ void freeClient(client *c) { * a context where calling freeClient() is not possible, because the client * should be valid for the continuation of the flow of the program. */ void freeClientAsync(client *c) { + /* We need to handle concurrent access to the server.clients_to_close list + * only in the freeClientAsync() function, since it's the only function that + * may access the list while Redis uses I/O threads. All the other accesses + * are in the context of the main thread while the other threads are + * idle. */ + static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER; if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return; c->flags |= CLIENT_CLOSE_ASAP; + pthread_mutex_lock(&async_free_queue_mutex); listAddNodeTail(server.clients_to_close,c); + pthread_mutex_unlock(&async_free_queue_mutex); } void freeClientsInAsyncFreeQueue(void) { @@ -1091,7 +1099,12 @@ client *lookupClientByID(uint64_t id) { } /* Write data in output buffers to client. Return C_OK if the client - * is still valid after the call, C_ERR if it was freed. */ + * is still valid after the call, C_ERR if it was freed because of some + * error. + * + * This function is called by threads, but always with handler_installed + * set to 0. So when handler_installed is set to 0 the function must be + * thread safe. */ int writeToClient(int fd, client *c, int handler_installed) { ssize_t nwritten = 0, totwritten = 0; size_t objlen; @@ -1153,14 +1166,15 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } + /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - serverLog(LL_VERBOSE, - "Error writing to client: %s", strerror(errno)); - freeClient(c); + // serverLog(LL_VERBOSE, + // "Error writing to client: %s", strerror(errno)); + freeClientAsync(c); return C_ERR; } } @@ -1173,11 +1187,15 @@ int writeToClient(int fd, client *c, int handler_installed) { } if (!clientHasPendingReplies(c)) { c->sentlen = 0; + /* Note that writeToClient() is called in a threaded way, but + * adDeleteFileEvent() is not thread safe: however writeToClient() + * is always called with handler_installed set to 0 from threads + * so we are fine. */ if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { - freeClient(c); + freeClientAsync(c); return C_ERR; } } @@ -2452,3 +2470,131 @@ int processEventsWhileBlocked(void) { } return count; } + +/* ============================================================================= + * Threaded I/O + * =========================================================================== */ + +#define SERVER_MAX_IO_THREADS 32 + +pthread_t io_threads[SERVER_MAX_IO_THREADS]; +pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; +int io_threads_done = 0; /* Number of threads that completed the work. */ +int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +list *io_threads_list[SERVER_MAX_IO_THREADS]; + +void *IOThreadMain(void *myid) { + /* The ID is the thread number (from 0 to server.iothreads_num-1), and is + * used by the thread to just manipulate a single sub-array of clients. */ + long id = (unsigned long)myid; + + while(1) { + /* ... Wait for start ... */ + pthread_mutex_lock(&io_threads_idle_mutex); + io_threads_idle++; + pthread_cond_signal(&io_threads_idle_cond); + printf("[%ld] Waiting start...\n", id); + pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); + printf("[%ld] Started\n", id); + pthread_mutex_unlock(&io_threads_idle_mutex); + printf("%d to handle\n", (int)listLength(io_threads_list[id])); + + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + + /* Report success. */ + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done++; + pthread_cond_signal(&io_threads_done_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + printf("[%ld] Done\n", id); + } +} + +/* Initialize the data structures needed for threaded I/O. */ +void initThreadedIO(void) { + pthread_t tid; + + server.io_threads_num = 4; + for (int i = 0; i < server.io_threads_num; i++) { + if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { + serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); + exit(1); + } + io_threads[i] = tid; + io_threads_list[i] = listCreate(); + } +} + +int handleClientsWithPendingWritesUsingThreads(void) { + int processed = listLength(server.clients_pending_write); + if (processed == 0) return 0; /* Return ASAP if there are no clients. */ + + printf("%d TOTAL\n", processed); + + /* Wait for all threads to be ready. */ + pthread_mutex_lock(&io_threads_idle_mutex); + while(io_threads_idle < server.io_threads_num) { + pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + } + printf("All threads are idle: %d\n", io_threads_idle); + io_threads_idle = 0; + pthread_mutex_unlock(&io_threads_idle_mutex); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_write,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_WRITE; + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Start all threads. */ + printf("Send start condition\n"); + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done = 0; + pthread_cond_broadcast(&io_threads_start_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + + /* Wait for all threads to end their work. */ + pthread_mutex_lock(&io_threads_done_mutex); + while(io_threads_done < server.io_threads_num) { + pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + } + pthread_mutex_unlock(&io_threads_done_mutex); + printf("All threads finshed\n"); + + /* Run the list of clients again to install the write handler where + * needed. */ + listRewind(server.clients_pending_write,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + + /* Install the write handler if there are pending writes in some + * of the clients. */ + if (clientHasPendingReplies(c) && + aeCreateFileEvent(server.el, c->fd, AE_WRITABLE, + sendReplyToClient, c) == AE_ERR) + { + freeClientAsync(c); + } + } + listEmpty(server.clients_pending_write); + return processed; +} diff --git a/src/server.c b/src/server.c index fb5d679cd..c437880d5 100644 --- a/src/server.c +++ b/src/server.c @@ -1981,9 +1981,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { flushAppendOnlyFile(0); } - /* Close clients that need to be closed asynchronous */ - freeClientsInAsyncFreeQueue(); - /* Clear the paused clients flag if needed. */ clientsArePaused(); /* Don't check return value, just use the side effect.*/ @@ -2075,7 +2072,12 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - handleClientsWithPendingWrites(); + /* XXX: Put a condition based on number of waiting clients: if we + * have less than a given number of clients, use non threaded code. */ + handleClientsWithPendingWritesUsingThreads(); + + /* Close clients that need to be closed asynchronous */ + freeClientsInAsyncFreeQueue(); /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this @@ -2861,6 +2863,7 @@ void initServer(void) { slowlogInit(); latencyMonitorInit(); bioInit(); + initThreadedIO(); server.initial_memory_usage = zmalloc_used_memory(); } diff --git a/src/server.h b/src/server.h index dfd9f7698..d2a563c96 100644 --- a/src/server.h +++ b/src/server.h @@ -1062,6 +1062,8 @@ struct redisServer { int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ + int io_threads_num; /* Number of IO threads to use. */ + /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ off_t loading_total_bytes; @@ -1576,12 +1578,14 @@ void pauseClients(mstime_t duration); int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); +int handleClientsWithPendingWritesUsingThreads(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); +void initThreadedIO(void); #ifdef __GNUC__ void addReplyErrorFormat(client *c, const char *fmt, ...) From a2dbd9bd977b814ed69500538c3125c51c4963b5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:16:13 +0100 Subject: [PATCH 25/99] Threaded IO: allow to disable debug printf. --- src/networking.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3958e4f5e..5dfaaf8ba 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2471,9 +2471,11 @@ int processEventsWhileBlocked(void) { return count; } -/* ============================================================================= +/* ========================================================================== * Threaded I/O - * =========================================================================== */ + * ========================================================================== */ + +int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 @@ -2497,11 +2499,11 @@ void *IOThreadMain(void *myid) { pthread_mutex_lock(&io_threads_idle_mutex); io_threads_idle++; pthread_cond_signal(&io_threads_idle_cond); - printf("[%ld] Waiting start...\n", id); + if (tio_debug) printf("[%ld] Waiting start...\n", id); pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - printf("[%ld] Started\n", id); + if (tio_debug) printf("[%ld] Started\n", id); pthread_mutex_unlock(&io_threads_idle_mutex); - printf("%d to handle\n", (int)listLength(io_threads_list[id])); + if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); /* ... Process ... */ listIter li; @@ -2518,7 +2520,7 @@ void *IOThreadMain(void *myid) { io_threads_done++; pthread_cond_signal(&io_threads_done_cond); pthread_mutex_unlock(&io_threads_done_mutex); - printf("[%ld] Done\n", id); + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2541,14 +2543,14 @@ int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - printf("%d TOTAL\n", processed); + if (tio_debug) printf("%d TOTAL\n", processed); /* Wait for all threads to be ready. */ pthread_mutex_lock(&io_threads_idle_mutex); while(io_threads_idle < server.io_threads_num) { pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); } - printf("All threads are idle: %d\n", io_threads_idle); + if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); io_threads_idle = 0; pthread_mutex_unlock(&io_threads_idle_mutex); @@ -2566,7 +2568,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { } /* Start all threads. */ - printf("Send start condition\n"); + if (tio_debug) printf("Send start condition\n"); pthread_mutex_lock(&io_threads_done_mutex); io_threads_done = 0; pthread_cond_broadcast(&io_threads_start_cond); @@ -2578,7 +2580,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); } pthread_mutex_unlock(&io_threads_done_mutex); - printf("All threads finshed\n"); + if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ From 6f4f36c0fb9498cee4289655036f6dd12a0bbebb Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:56:48 +0100 Subject: [PATCH 26/99] Threaded IO: second attempt without signaling conditions. --- src/networking.c | 106 +++++++++++++++++++++++++---------------------- src/server.c | 2 - 2 files changed, 56 insertions(+), 52 deletions(-) diff --git a/src/networking.c b/src/networking.c index 5dfaaf8ba..cd241dac2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2480,13 +2480,9 @@ int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; -pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; -int io_threads_done = 0; /* Number of threads that completed the work. */ -int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; +_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; +int io_threads_active; list *io_threads_list[SERVER_MAX_IO_THREADS]; void *IOThreadMain(void *myid) { @@ -2496,30 +2492,23 @@ void *IOThreadMain(void *myid) { while(1) { /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_idle_mutex); - io_threads_idle++; - pthread_cond_signal(&io_threads_idle_cond); - if (tio_debug) printf("[%ld] Waiting start...\n", id); - pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - if (tio_debug) printf("[%ld] Started\n", id); - pthread_mutex_unlock(&io_threads_idle_mutex); - if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); - - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + pthread_mutex_lock(&io_threads_mutex[id]); + if (io_threads_pending[id]) { + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); + + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + io_threads_pending[id]--; + } + listEmpty(io_threads_list[id]); } - listEmpty(io_threads_list[id]); - /* Report success. */ - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done++; - pthread_cond_signal(&io_threads_done_cond); - pthread_mutex_unlock(&io_threads_done_mutex); + pthread_mutex_unlock(&io_threads_mutex[id]); if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2529,30 +2518,50 @@ void initThreadedIO(void) { pthread_t tid; server.io_threads_num = 4; + io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_mutex_init(&io_threads_mutex[i],NULL); + io_threads_pending[i] = 0; + io_threads_list[i] = listCreate(); + pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */ if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); exit(1); } io_threads[i] = tid; - io_threads_list[i] = listCreate(); } } +void startThreadedIO(void) { + if (tio_debug) printf("--- STARTING THREADED IO ---\n"); + serverAssert(io_threads_active == 0); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_unlock(&io_threads_mutex[j]); + io_threads_active = 1; +} + +void stopThreadedIO(void) { + if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + serverAssert(io_threads_active == 1); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_lock(&io_threads_mutex[j]); + io_threads_active = 0; +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - if (tio_debug) printf("%d TOTAL\n", processed); - - /* Wait for all threads to be ready. */ - pthread_mutex_lock(&io_threads_idle_mutex); - while(io_threads_idle < server.io_threads_num) { - pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + /* If we have just a few clients to serve, don't use I/O threads, but the + * boring synchronous code. */ + if (processed < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return handleClientsWithPendingWrites(); + } else { + if (!io_threads_active) startThreadedIO(); } - if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); - io_threads_idle = 0; - pthread_mutex_unlock(&io_threads_idle_mutex); + + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2563,23 +2572,20 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; + pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); + io_threads_pending[target_id]++; + pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } - /* Start all threads. */ - if (tio_debug) printf("Send start condition\n"); - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done = 0; - pthread_cond_broadcast(&io_threads_start_cond); - pthread_mutex_unlock(&io_threads_done_mutex); - /* Wait for all threads to end their work. */ - pthread_mutex_lock(&io_threads_done_mutex); - while(io_threads_done < server.io_threads_num) { - pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; } - pthread_mutex_unlock(&io_threads_done_mutex); if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where diff --git a/src/server.c b/src/server.c index c437880d5..de5a814d0 100644 --- a/src/server.c +++ b/src/server.c @@ -2072,8 +2072,6 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - /* XXX: Put a condition based on number of waiting clients: if we - * have less than a given number of clients, use non threaded code. */ handleClientsWithPendingWritesUsingThreads(); /* Close clients that need to be closed asynchronous */ From ea35a81c42a738a73ec4505b69e1b0d16e31fb34 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 16:33:23 +0100 Subject: [PATCH 27/99] Threaded IO: 3rd version: use the mutex only to stop the thread. --- src/networking.c | 52 ++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/networking.c b/src/networking.c index cd241dac2..17d6b1866 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2491,24 +2491,34 @@ void *IOThreadMain(void *myid) { long id = (unsigned long)myid; while(1) { - /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_mutex[id]); - if (io_threads_pending[id]) { - if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); - io_threads_pending[id]--; - } - listEmpty(io_threads_list[id]); + /* Wait for start */ + for (int j = 0; j < 1000000; j++) { + if (io_threads_pending[id] != 0) break; + } + + /* Give the main thread a chance to stop this thread. */ + if (io_threads_pending[id] == 0) { + pthread_mutex_lock(&io_threads_mutex[id]); + pthread_mutex_unlock(&io_threads_mutex[id]); + continue; } - pthread_mutex_unlock(&io_threads_mutex[id]); + serverAssert(io_threads_pending[id] != 0); + + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); + + /* Process: note that the main thread will never touch our list + * before we drop the pending count to 0. */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + io_threads_pending[id] = 0; + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2572,13 +2582,17 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; - pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); - io_threads_pending[target_id]++; - pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + /* Wait for all threads to end their work. */ while(1) { unsigned long pending = 0; From 9bf7f302a77e69bad40c3d13639537049ece433c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 25 Mar 2019 17:05:06 +0000 Subject: [PATCH 28/99] Threaded IO: stop threads when no longer needed + C11 in Makefile. Now threads are stopped even when the connections drop immediately to zero, not allowing the networking code to detect the condition and stop the threads. serverCron() will handle that. --- src/Makefile | 2 +- src/networking.c | 29 ++++++++++++++++++++++++----- src/server.c | 3 +++ src/server.h | 1 + 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index 93cfdc28e..1c80e547f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -STD=-std=c99 -pedantic -DREDIS_STATIC='' +1TD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions diff --git a/src/networking.c b/src/networking.c index 17d6b1866..d61e1f044 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2527,7 +2527,7 @@ void *IOThreadMain(void *myid) { void initThreadedIO(void) { pthread_t tid; - server.io_threads_num = 4; + server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_mutex_init(&io_threads_mutex[i],NULL); @@ -2543,6 +2543,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { + printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2551,6 +2552,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) @@ -2558,19 +2560,36 @@ void stopThreadedIO(void) { io_threads_active = 0; } +/* This function checks if there are not enough pending clients to justify + * taking the I/O threads active: in that case I/O threads are stopped if + * currently active. + * + * The function returns 0 if the I/O threading should be used becuase there + * are enough active threads, otherwise 1 is returned and the I/O threads + * could be possibly stopped (if already active) as a side effect. */ +int stopThreadedIOIfNeeded(void) { + int pending = listLength(server.clients_pending_write); + if (pending < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return 1; + } else { + return 0; + } +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ /* If we have just a few clients to serve, don't use I/O threads, but the * boring synchronous code. */ - if (processed < (server.io_threads_num*2)) { - if (io_threads_active) stopThreadedIO(); + if (stopThreadedIOIfNeeded()) { return handleClientsWithPendingWrites(); - } else { - if (!io_threads_active) startThreadedIO(); } + /* Start threads if needed. */ + if (!io_threads_active) startThreadedIO(); + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ diff --git a/src/server.c b/src/server.c index de5a814d0..325c9010c 100644 --- a/src/server.c +++ b/src/server.c @@ -2001,6 +2001,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Stop the I/O threads if we don't have enough pending work. */ + stopThreadedIOIfNeeded(); + /* Start a scheduled BGSAVE if the corresponding flag is set. This is * useful when we are forced to postpone a BGSAVE because an AOF * rewrite is in progress. diff --git a/src/server.h b/src/server.h index d2a563c96..96ee37887 100644 --- a/src/server.h +++ b/src/server.h @@ -1579,6 +1579,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); From 30091dc29f5a1aa9e751ab5cbec0b525cd4d0f49 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:39:13 +0100 Subject: [PATCH 29/99] Threaded IO: use main thread if num of threads is 1. --- src/networking.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index d61e1f044..916f29ebc 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,11 +2525,16 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - pthread_t tid; - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ + + /* Don't spawn any thread if the user selected a single thread: + * we'll handle I/O directly from the main thread. */ + if (server.io_threads_num == 1) return; + + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_t tid; pthread_mutex_init(&io_threads_mutex[i],NULL); io_threads_pending[i] = 0; io_threads_list[i] = listCreate(); @@ -2569,6 +2574,10 @@ void stopThreadedIO(void) { * could be possibly stopped (if already active) as a side effect. */ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); + + /* Return ASAP if IO threads are disabled (single threaded mode). */ + if (server.io_threads_num == 1) return 0; + if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); return 1; From 9814b2a5f3e91eafb21ff1fe865a161abf71045f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:58:45 +0100 Subject: [PATCH 30/99] Threaded IO: make num of I/O threads configurable. --- src/config.c | 7 +++++++ src/networking.c | 3 +-- src/server.c | 1 + src/server.h | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 1e0525594..c4a18f3bb 100644 --- a/src/config.c +++ b/src/config.c @@ -313,6 +313,11 @@ void loadServerConfigFromString(char *config) { if (server.dbnum < 1) { err = "Invalid number of databases"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads") && argc == 2) { + server.io_threads_num = atoi(argv[1]); + if (server.io_threads_num < 1 || server.io_threads_num > 512) { + err = "Invalid number of I/O threads"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1426,6 +1431,7 @@ void configGetCommand(client *c) { config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port); config_get_numerical_field("tcp-backlog",server.tcp_backlog); config_get_numerical_field("databases",server.dbnum); + config_get_numerical_field("io-threads",server.io_threads_num); config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); config_get_numerical_field("repl-ping-replica-period",server.repl_ping_slave_period); config_get_numerical_field("repl-timeout",server.repl_timeout); @@ -2239,6 +2245,7 @@ int rewriteConfig(char *path) { rewriteConfigSaveOption(state); rewriteConfigUserOption(state); rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM); + rewriteConfigNumericalOption(state,"io-threads",server.dbnum,CONFIG_DEFAULT_IO_THREADS_NUM); rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR); rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION); rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM); diff --git a/src/networking.c b/src/networking.c index 916f29ebc..275338a6f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,7 +2525,6 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ /* Don't spawn any thread if the user selected a single thread: @@ -2576,7 +2575,7 @@ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); /* Return ASAP if IO threads are disabled (single threaded mode). */ - if (server.io_threads_num == 1) return 0; + if (server.io_threads_num == 1) return 1; if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); diff --git a/src/server.c b/src/server.c index 325c9010c..f6d2b47f3 100644 --- a/src/server.c +++ b/src/server.c @@ -2317,6 +2317,7 @@ void initServerConfig(void) { server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; + server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; unsigned int lruclock = getLRUClock(); atomicSet(server.lruclock,lruclock); diff --git a/src/server.h b/src/server.h index 96ee37887..2e4de2bb1 100644 --- a/src/server.h +++ b/src/server.h @@ -87,6 +87,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 +#define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) From 74591fb5bddc995dfaa51c05b3362e7675187b0f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:59:39 +0100 Subject: [PATCH 31/99] Threaded IO: hide more debugging printfs under conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 275338a6f..caffd3be7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2547,7 +2547,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - printf("S"); fflush(stdout); + if (tio_debug) printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2556,7 +2556,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { - printf("E"); fflush(stdout); + if (tio_debug) printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) From dd5b105c73a02389987e457cebbeaa801ba16977 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 30 Mar 2019 11:26:58 +0100 Subject: [PATCH 32/99] Threaded IO: read side WIP. --- src/evict.c | 2 +- src/networking.c | 61 ++++++++++++++++++++++++++++++++++++++---------- src/server.c | 30 ++++++++++-------------- src/server.h | 28 +++++++++++----------- 4 files changed, 75 insertions(+), 46 deletions(-) diff --git a/src/evict.c b/src/evict.c index 773916ce8..176f4c362 100644 --- a/src/evict.c +++ b/src/evict.c @@ -78,7 +78,7 @@ unsigned int getLRUClock(void) { unsigned int LRU_CLOCK(void) { unsigned int lruclock; if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { - atomicGet(server.lruclock,lruclock); + lruclock = server.lruclock; } else { lruclock = getLRUClock(); } diff --git a/src/networking.c b/src/networking.c index caffd3be7..fd4e990f4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -35,6 +35,7 @@ #include static void setProtocolError(const char *errstr, client *c); +int postponeClientRead(client *c); /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute @@ -105,8 +106,7 @@ client *createClient(int fd) { } selectDb(c,0); - uint64_t client_id; - atomicGetIncr(server.next_client_id,client_id,1); + uint64_t client_id = ++server.next_client_id; c->id = client_id; c->resp = 2; c->fd = fd; @@ -950,6 +950,14 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_PENDING_WRITE; } + /* Remove from the list of pending reads if needed. */ + if (c->flags & CLIENT_PENDING_READ) { + ln = listSearchKey(server.clients_pending_read,c); + serverAssert(ln != NULL); + listDelNode(server.clients_pending_read,ln); + c->flags &= ~CLIENT_PENDING_READ; + } + /* When client was just unblocked because of a blocking operation, * remove it from the list of unblocked clients. */ if (c->flags & CLIENT_UNBLOCKED) { @@ -1642,13 +1650,19 @@ void processInputBuffer(client *c) { } /* This is a wrapper for processInputBuffer that also cares about handling - * the replication forwarding to the sub-slaves, in case the client 'c' + * the replication forwarding to the sub-replicas, in case the client 'c' * is flagged as master. Usually you want to call this instead of the * raw processInputBuffer(). */ void processInputBufferAndReplicate(client *c) { if (!(c->flags & CLIENT_MASTER)) { processInputBuffer(c); } else { + /* If the client is a master we need to compute the difference + * between the applied offset before and after processing the buffer, + * to understand how much of the replication stream was actually + * applied to the master state: this quantity, and its corresponding + * part of the replication stream, will be propagated to the + * sub-replicas and to the replication backlog. */ size_t prev_offset = c->reploff; processInputBuffer(c); size_t applied = c->reploff - prev_offset; @@ -1667,6 +1681,10 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { UNUSED(el); UNUSED(mask); + /* Check if we want to read from the client later when exiting from + * the event loop. This is the case if threaded I/O is enabled. */ + if (postponeClientRead(c)) return; + readlen = PROTO_IOBUF_LEN; /* If this is a multi bulk request, and we are processing a bulk reply * that is large enough, try to maximize the probability that the query @@ -1716,20 +1734,21 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); - serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); +// FIXME: This may be called from an I/O thread and it is not safe to +// log from there for now. +// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClient(c); return; } - /* Time to process the buffer. If the client is a master we need to - * compute the difference between the applied offset before and after - * processing the buffer, to understand how much of the replication stream - * was actually applied to the master state: this quantity, and its - * corresponding part of the replication stream, will be propagated to - * the sub-slaves and to the replication backlog. */ - processInputBufferAndReplicate(c); + /* There is more data in the client input buffer, continue parsing it + * in case to check if there is a full command to execute. + * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means + * we are currently in the context of an I/O thread. */ + if (!(c->flags & CLIENT_PENDING_READ)) + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, @@ -2566,7 +2585,9 @@ void stopThreadedIO(void) { /* This function checks if there are not enough pending clients to justify * taking the I/O threads active: in that case I/O threads are stopped if - * currently active. + * currently active. We track the pending writes as a measure of clients + * we need to handle in parallel, however the I/O threading is disabled + * globally for reads as well if we have too little pending clients. * * The function returns 0 if the I/O threading should be used becuase there * are enough active threads, otherwise 1 is returned and the I/O threads @@ -2647,3 +2668,19 @@ int handleClientsWithPendingWritesUsingThreads(void) { listEmpty(server.clients_pending_write); return processed; } + +/* Return 1 if we want to handle the client read later using threaded I/O. + * This is called by the readable handler of the event loop. + * As a side effect of calling this function the client is put in the + * pending read clients and flagged as such. */ +int postponeClientRead(client *c) { + if (io_threads_active && + !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) + { + c->flags |= CLIENT_PENDING_READ; + listAddNodeHead(server.clients_pending_read,c); + return 1; + } else { + return 0; + } +} diff --git a/src/server.c b/src/server.c index f6d2b47f3..ef6b85c44 100644 --- a/src/server.c +++ b/src/server.c @@ -1728,16 +1728,17 @@ void databasesCron(void) { * every object access, and accuracy is not needed. To access a global var is * a lot faster than calling time(NULL) */ void updateCachedTime(void) { - time_t unixtime = time(NULL); - atomicSet(server.unixtime,unixtime); + server.unixtime = time(NULL); server.mstime = mstime(); - /* To get information about daylight saving time, we need to call localtime_r - * and cache the result. However calling localtime_r in this context is safe - * since we will never fork() while here, in the main thread. The logging - * function will call a thread safe version of localtime that has no locks. */ + /* To get information about daylight saving time, we need to call + * localtime_r and cache the result. However calling localtime_r in this + * context is safe since we will never fork() while here, in the main + * thread. The logging function will call a thread safe version of + * localtime that has no locks. */ struct tm tm; - localtime_r(&server.unixtime,&tm); + time_t ut = server.unixtime; + localtime_r(&ut,&tm); server.daylight_active = tm.tm_isdst; } @@ -1807,8 +1808,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * * Note that you can change the resolution altering the * LRU_CLOCK_RESOLUTION define. */ - unsigned long lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); /* Record the max memory used since the server was started. */ if (zmalloc_used_memory() > server.stat_peak_memory) @@ -2202,10 +2202,6 @@ void createSharedObjects(void) { void initServerConfig(void) { int j; - pthread_mutex_init(&server.next_client_id_mutex,NULL); - pthread_mutex_init(&server.lruclock_mutex,NULL); - pthread_mutex_init(&server.unixtime_mutex,NULL); - updateCachedTime(); getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); server.runid[CONFIG_RUN_ID_SIZE] = '\0'; @@ -2319,8 +2315,7 @@ void initServerConfig(void) { server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; - unsigned int lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -2718,6 +2713,7 @@ void initServer(void) { server.slaves = listCreate(); server.monitors = listCreate(); server.clients_pending_write = listCreate(); + server.clients_pending_read = listCreate(); server.slaveseldb = -1; /* Force to emit the first SELECT command. */ server.unblocked_clients = listCreate(); server.ready_keys = listCreate(); @@ -3821,8 +3817,6 @@ sds genRedisInfoString(char *section) { call_uname = 0; } - unsigned int lruclock; - atomicGet(server.lruclock,lruclock); info = sdscatprintf(info, "# Server\r\n" "redis_version:%s\r\n" @@ -3866,7 +3860,7 @@ sds genRedisInfoString(char *section) { (intmax_t)(uptime/(3600*24)), server.hz, server.config_hz, - (unsigned long) lruclock, + (unsigned long) server.lruclock, server.executable ? server.executable : "", server.configfile ? server.configfile : ""); } diff --git a/src/server.h b/src/server.h index 2e4de2bb1..dcfcb55fb 100644 --- a/src/server.h +++ b/src/server.h @@ -285,6 +285,9 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */ #define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */ #define CLIENT_PROTECTED (1<<28) /* Client should not be freed for now. */ +#define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put + in the list of clients we can read + from. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -1018,7 +1021,7 @@ struct redisServer { dict *commands; /* Command table */ dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; - unsigned int lruclock; /* Clock for LRU eviction */ + _Atomic unsigned int lruclock; /* Clock for LRU eviction */ int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ @@ -1052,6 +1055,7 @@ struct redisServer { list *clients; /* List of active clients */ list *clients_to_close; /* Clients to close asynchronously */ list *clients_pending_write; /* There is to write or install handler. */ + list *clients_pending_read; /* Client has pending read socket buffers. */ list *slaves, *monitors; /* List of slaves and MONITORs */ client *current_client; /* Current client, only used on crash report */ rax *clients_index; /* Active clients dictionary by client ID. */ @@ -1059,7 +1063,7 @@ struct redisServer { mstime_t clients_pause_end_time; /* Time when we undo clients_paused */ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ - uint64_t next_client_id; /* Next client unique ID. Incremental. */ + _Atomic uint64_t next_client_id; /* Next client unique ID. Incremental. */ int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ @@ -1104,8 +1108,8 @@ struct redisServer { long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */ - long long stat_net_input_bytes; /* Bytes read from network. */ - long long stat_net_output_bytes; /* Bytes written to network. */ + _Atomic long long stat_net_input_bytes; /* Bytes read from network. */ + _Atomic long long stat_net_output_bytes; /* Bytes written to network. */ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */ /* The following two are used to track instantaneous metrics, like @@ -1128,7 +1132,7 @@ struct redisServer { int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */ - size_t client_max_querybuf_len; /* Limit for client query buffer length */ + _Atomic size_t client_max_querybuf_len; /* Limit for client query buffer length */ int dbnum; /* Total number of configured DBs */ int supervised; /* 1 if supervised, 0 otherwise. */ int supervised_mode; /* See SUPERVISED_* */ @@ -1297,10 +1301,10 @@ struct redisServer { int list_max_ziplist_size; int list_compress_depth; /* time cache */ - time_t unixtime; /* Unix time sampled every cron cycle. */ - time_t timezone; /* Cached timezone. As set by tzset(). */ - int daylight_active; /* Currently in daylight saving time. */ - long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ + _Atomic time_t unixtime; /* Unix time sampled every cron cycle. */ + time_t timezone; /* Cached timezone. As set by tzset(). */ + int daylight_active; /* Currently in daylight saving time. */ + long long mstime; /* 'unixtime' with milliseconds resolution. */ /* Pubsub */ dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ @@ -1360,12 +1364,6 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ - - /* Mutexes used to protect atomic variables when atomic builtins are - * not available. */ - pthread_mutex_t lruclock_mutex; - pthread_mutex_t next_client_id_mutex; - pthread_mutex_t unixtime_mutex; }; typedef struct pubsubPattern { From a2245f8ff146629159d8c52d60713a262fa1b69a Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 15:58:54 +0200 Subject: [PATCH 33/99] Threaded IO: read side WIP 2. --- src/networking.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index fd4e990f4..7d8470489 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2496,13 +2496,16 @@ int processEventsWhileBlocked(void) { int tio_debug = 0; -#define SERVER_MAX_IO_THREADS 32 +#define IO_THREADS_MAX_NUM 128 +#define IO_THREADS_OP_READ 0 +#define IO_THREADS_OP_WRITE 1 -pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; -_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; -int io_threads_active; -list *io_threads_list[SERVER_MAX_IO_THREADS]; +pthread_t io_threads[IO_THREADS_MAX_NUM]; +pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM]; +_Atomic unsigned long io_threads_pending[IO_THREADS_MAX_NUM]; +int io_threads_active; /* Are the threads currently spinning waiting I/O? */ +int io_threads_op; /* IO_THREADS_OP_WRITE or IO_THREADS_OP_READ. */ +list *io_threads_list[IO_THREADS_MAX_NUM]; void *IOThreadMain(void *myid) { /* The ID is the thread number (from 0 to server.iothreads_num-1), and is @@ -2533,7 +2536,11 @@ void *IOThreadMain(void *myid) { listRewind(io_threads_list[id],&li); while((ln = listNext(&li))) { client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + if (io_threads_op == IO_THREADS_OP_WRITE) { + writeToClient(c->fd,c,0); + } else { + readQueryFromClient(NULL,c->fd,c,0); + } } listEmpty(io_threads_list[id]); io_threads_pending[id] = 0; @@ -2550,6 +2557,12 @@ void initThreadedIO(void) { * we'll handle I/O directly from the main thread. */ if (server.io_threads_num == 1) return; + if (server.io_threads_num > IO_THREADS_MAX_NUM) { + serverLog(LL_WARNING,"Fatal: too many I/O threads configured. " + "The maximum number is %d.", IO_THREADS_MAX_NUM); + exit(1); + } + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_t tid; @@ -2684,3 +2697,6 @@ int postponeClientRead(client *c) { return 0; } } + +int handleClientsWithPendingReadsUsingThreads(void) { +} From 63a0ffd36a99083b909e2110a7604fe335656a8d Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 21:59:50 +0200 Subject: [PATCH 34/99] Threaded IO: read side WIP 3. --- src/networking.c | 59 +++++++++++++++++++++++++++++++++++++++++++----- src/server.c | 1 + src/server.h | 1 + 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/networking.c b/src/networking.c index 7d8470489..3a36badb8 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1711,12 +1711,12 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } else { serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno)); - freeClient(c); + freeClientAsync(c); return; } } else if (nread == 0) { serverLog(LL_VERBOSE, "Client closed connection"); - freeClient(c); + freeClientAsync(c); return; } else if (c->flags & CLIENT_MASTER) { /* Append the query buffer to the pending (not applied) buffer @@ -1739,7 +1739,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { // serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); - freeClient(c); + freeClientAsync(c); return; } @@ -2538,8 +2538,10 @@ void *IOThreadMain(void *myid) { client *c = listNodeValue(ln); if (io_threads_op == IO_THREADS_OP_WRITE) { writeToClient(c->fd,c,0); - } else { + } else if (io_threads_op == IO_THREADS_OP_READ) { readQueryFromClient(NULL,c->fd,c,0); + } else { + serverPanic("io_threads_op value is unknown"); } } listEmpty(io_threads_list[id]); @@ -2632,7 +2634,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Start threads if needed. */ if (!io_threads_active) startThreadedIO(); - if (tio_debug) printf("%d TOTAL pending clients\n", processed); + if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2649,6 +2651,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Give the start condition to the waiting threads, by setting the * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_WRITE; for (int j = 0; j < server.io_threads_num; j++) { int count = listLength(io_threads_list[j]); io_threads_pending[j] = count; @@ -2661,7 +2664,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pending += io_threads_pending[j]; if (pending == 0) break; } - if (tio_debug) printf("All threads finshed\n"); + if (tio_debug) printf("I/O WRITE All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ @@ -2699,4 +2702,48 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { + if (!io_threads_active) return 0; + int processed = listLength(server.clients_pending_read); + if (processed == 0) return 0; + + if (tio_debug) printf("%d TOTAL READ pending clients\n", processed); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_read,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_READ; + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + + /* Wait for all threads to end their work. */ + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; + } + if (tio_debug) printf("I/O READ All threads finshed\n"); + + /* Run the list of clients again to process the new buffers. */ + listRewind(server.clients_pending_read,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_READ; + processInputBufferAndReplicate(c); + } + listEmpty(server.clients_pending_read); + return processed; } diff --git a/src/server.c b/src/server.c index ef6b85c44..e0c48b097 100644 --- a/src/server.c +++ b/src/server.c @@ -2092,6 +2092,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) { void afterSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); if (moduleCount()) moduleAcquireGIL(); + handleClientsWithPendingReadsUsingThreads(); } /* =========================== Server initialization ======================== */ diff --git a/src/server.h b/src/server.h index dcfcb55fb..0d7882419 100644 --- a/src/server.h +++ b/src/server.h @@ -1578,6 +1578,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int handleClientsWithPendingReadsUsingThreads(void); int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); From 8d7d2be24fb74234603667e8da4de2d2f466aff1 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 22:06:00 +0200 Subject: [PATCH 35/99] Threaded IO: process read queue before stopping threads. --- src/networking.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 3a36badb8..29a56e983 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2590,8 +2590,13 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + /* We may have still clients with pending reads when this function + * is called: handle them before stopping the threads. */ + handleClientsWithPendingReadsUsingThreads(); if (tio_debug) printf("E"); fflush(stdout); - if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", + (int) listLength(server.clients_pending_read), + (int) listLength(server.clients_pending_write)); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) pthread_mutex_lock(&io_threads_mutex[j]); From 463ccf86642ae35e18cf0c84be4e8e9e7c905c70 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 13:12:10 +0200 Subject: [PATCH 36/99] Threaded IO: logging should be safe in I/O threads. Potentially it is possible that we get interleaved writes, even if serverLog() makes sure to write into a buffer and then use printf(), so even this should be ok. However in general POSIX guarantees that writing to the same file pointer object from multiple threads is safe. Anyway currently we *reopen* the file at each call, but for the standard output logging. The logging functions actually also access global configuration while performing the log (for instance in order to check the log level, the log filename and so forth), however dunring the I/O threads execution we cannot alter such shared state in any way. --- src/networking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 29a56e983..0e11e1f3f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1174,14 +1174,13 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } - /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - // serverLog(LL_VERBOSE, - // "Error writing to client: %s", strerror(errno)); + serverLog(LL_VERBOSE, + "Error writing to client: %s", strerror(errno)); freeClientAsync(c); return C_ERR; } From 647a66ebba5d12d461e830f174a1c90a4e96c5cd Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 12 Apr 2019 17:18:10 +0200 Subject: [PATCH 37/99] Threaded IO: parsing WIP 1: set current_client in a better scoped way. --- src/networking.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/networking.c b/src/networking.c index 0e11e1f3f..3faaf4a12 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1563,7 +1563,7 @@ int processMultibulkBuffer(client *c) { * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - server.current_client = c; + int deadclient = 0; /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { @@ -1619,6 +1619,7 @@ void processInputBuffer(client *c) { resetClient(c); } else { /* Only reset the client when the command was executed. */ + server.current_client = c; if (processCommand(c) == C_OK) { if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ @@ -1629,23 +1630,26 @@ void processInputBuffer(client *c) { * module blocking command, so that the reply callback will * still be able to access the client argv and argc field. * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE) + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { resetClient(c); + } } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; /* freeMemoryIfNeeded may flush slave output buffers. This may * result into a slave, that may be the active client, to be * freed. */ - if (server.current_client == NULL) break; + if (deadclient) break; } } /* Trim to pos */ - if (server.current_client != NULL && c->qb_pos) { + if (!deadclient && c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } - - server.current_client = NULL; } /* This is a wrapper for processInputBuffer that also cares about handling @@ -1743,11 +1747,8 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { } /* There is more data in the client input buffer, continue parsing it - * in case to check if there is a full command to execute. - * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means - * we are currently in the context of an I/O thread. */ - if (!(c->flags & CLIENT_PENDING_READ)) - processInputBufferAndReplicate(c); + * in case to check if there is a full command to execute. */ + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, From 6ab6a97fe6991d1496a3c8efa52280db3a3df3eb Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 26 Apr 2019 19:29:50 +0200 Subject: [PATCH 38/99] Threaded IO: parsing WIP 2: refactoring to parse from thread. --- src/networking.c | 87 ++++++++++++++++++++++++++++++++---------------- src/server.h | 1 + 2 files changed, 60 insertions(+), 28 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3faaf4a12..4361ab1af 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1558,13 +1558,47 @@ int processMultibulkBuffer(client *c) { return C_ERR; } +/* This function calls processCommand(), but also performs a few sub tasks + * that are useful in that context: + * + * 1. It sets the current client to the client 'c'. + * 2. In the case of master clients, the replication offset is updated. + * 3. The client is reset unless there are reasons to avoid doing it. + * + * The function returns C_ERR in case the client was freed as a side effect + * of processing the command, otherwise C_OK is returned. */ +int processCommandAndResetClient(client *c) { + int deadclient = 0; + server.current_client = c; + if (processCommand(c) == C_OK) { + if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { + /* Update the applied replication offset of our master. */ + c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + } + + /* Don't reset the client structure for clients blocked in a + * module blocking command, so that the reply callback will + * still be able to access the client argv and argc field. + * The client will be reset in unblockClientFromModule(). */ + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { + resetClient(c); + } + } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; + /* freeMemoryIfNeeded may flush slave output buffers. This may + * result into a slave, that may be the active client, to be + * freed. */ + return deadclient ? C_ERR : C_OK; +} + /* This function is called every time, in the client structure 'c', there is * more query buffer to process, because we read more data from the socket * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - int deadclient = 0; - /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { /* Return if clients are paused. */ @@ -1573,6 +1607,10 @@ void processInputBuffer(client *c) { /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED) break; + /* Don't process more buffers from clients that have already pending + * commands to execute in c->argv. */ + if (c->flags & CLIENT_PENDING_COMMAND) break; + /* Don't process input from the master while there is a busy script * condition on the slave. We want just to accumulate the replication * stream (instead of replying -BUSY like we do with other clients) and @@ -1618,35 +1656,26 @@ void processInputBuffer(client *c) { if (c->argc == 0) { resetClient(c); } else { - /* Only reset the client when the command was executed. */ - server.current_client = c; - if (processCommand(c) == C_OK) { - if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { - /* Update the applied replication offset of our master. */ - c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - } + /* If we are in the context of an I/O thread, we can't really + * execute the command here. All we can do is to flag the client + * as one that needs to process the command. */ + if (c->flags & CLIENT_PENDING_READ) { + c->flags |= CLIENT_PENDING_COMMAND; + break; + } - /* Don't reset the client structure for clients blocked in a - * module blocking command, so that the reply callback will - * still be able to access the client argv and argc field. - * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || - c->btype != BLOCKED_MODULE) - { - resetClient(c); - } + /* We are finally ready to execute the command. */ + if (processCommandAndResetClient(c) == C_ERR) { + /* If the client is no longer valid, we avoid exiting this + * loop and trimming the client buffer later. So we return + * ASAP in that case. */ + return; } - if (server.current_client == NULL) deadclient = 1; - server.current_client = NULL; - /* freeMemoryIfNeeded may flush slave output buffers. This may - * result into a slave, that may be the active client, to be - * freed. */ - if (deadclient) break; } } /* Trim to pos */ - if (!deadclient && c->qb_pos) { + if (c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } @@ -1737,9 +1766,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); -// FIXME: This may be called from an I/O thread and it is not safe to -// log from there for now. -// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); + serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClientAsync(c); @@ -2747,6 +2774,10 @@ int handleClientsWithPendingReadsUsingThreads(void) { while((ln = listNext(&li))) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_READ; + if (c->flags & CLIENT_PENDING_COMMAND) { + c->flags &= ~ CLIENT_PENDING_COMMAND; + processCommandAndResetClient(c); + } processInputBufferAndReplicate(c); } listEmpty(server.clients_pending_read); diff --git a/src/server.h b/src/server.h index 0d7882419..c088d356a 100644 --- a/src/server.h +++ b/src/server.h @@ -288,6 +288,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put in the list of clients we can read from. */ +#define CLIENT_PENDING_COMMAND (1<<30) /* */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ From 90d720e7a5777ec34c93258d97592d8c6b439988 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 29 Apr 2019 12:46:23 +0200 Subject: [PATCH 39/99] Threaded IO: put fflush() inside tio_debug conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 4361ab1af..74bd0f13d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2608,7 +2608,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - if (tio_debug) printf("S"); fflush(stdout); + if (tio_debug) { printf("S"); fflush(stdout); } if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2620,7 +2620,7 @@ void stopThreadedIO(void) { /* We may have still clients with pending reads when this function * is called: handle them before stopping the threads. */ handleClientsWithPendingReadsUsingThreads(); - if (tio_debug) printf("E"); fflush(stdout); + if (tio_debug) { printf("E"); fflush(stdout); } if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", (int) listLength(server.clients_pending_read), (int) listLength(server.clients_pending_write)); From 1c0c436757f278565b400c36e763531d073ef4bb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:39:27 +0200 Subject: [PATCH 40/99] Threaded IO: ability to disable reads from threaded path. --- src/networking.c | 3 ++- src/server.c | 1 + src/server.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 74bd0f13d..651dbdb8a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2723,6 +2723,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { * pending read clients and flagged as such. */ int postponeClientRead(client *c) { if (io_threads_active && + server.io_threads_do_reads && !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) { c->flags |= CLIENT_PENDING_READ; @@ -2734,7 +2735,7 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { - if (!io_threads_active) return 0; + if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); if (processed == 0) return 0; diff --git a/src/server.c b/src/server.c index e0c48b097..2643d7266 100644 --- a/src/server.c +++ b/src/server.c @@ -2315,6 +2315,7 @@ void initServerConfig(void) { server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; + server.io_threads_do_reads = CONFIG_DEFAULT_IO_THREADS_DO_READS; server.lruclock = getLRUClock(); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index c088d356a..3987ab5fc 100644 --- a/src/server.h +++ b/src/server.h @@ -88,6 +88,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 #define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ +#define CONFIG_DEFAULT_IO_THREADS_DO_READS 0 /* Read + parse from threads? */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) @@ -1069,6 +1070,7 @@ struct redisServer { int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ int io_threads_num; /* Number of IO threads to use. */ + int io_threads_do_reads; /* Read and parse from IO threads? */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ From 5baeb14cf3ca8eb345a7a7352bf482542168728e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:55:02 +0200 Subject: [PATCH 41/99] Threaded IO: configuration directive for turning on/off reads. --- src/config.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/config.c b/src/config.c index c4a18f3bb..1686743a0 100644 --- a/src/config.c +++ b/src/config.c @@ -318,6 +318,10 @@ void loadServerConfigFromString(char *config) { if (server.io_threads_num < 1 || server.io_threads_num > 512) { err = "Invalid number of I/O threads"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads-do-reads") && argc == 2) { + if ((server.io_threads_do_reads = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1485,6 +1489,7 @@ void configGetCommand(client *c) { config_get_bool_field("activedefrag", server.active_defrag_enabled); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("gopher-enabled", server.gopher_enabled); + config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); config_get_bool_field("repl-disable-tcp-nodelay", server.repl_disable_tcp_nodelay); config_get_bool_field("repl-diskless-sync", @@ -2316,6 +2321,7 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); + rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); From 3d053dbb6dea51d59709913e9c0e9f96cc1d24f8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:59:23 +0200 Subject: [PATCH 42/99] Threaded IO: handleClientsWithPendingReadsUsingThreads top comment. --- src/networking.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/networking.c b/src/networking.c index 651dbdb8a..6fec97605 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2734,6 +2734,12 @@ int postponeClientRead(client *c) { } } +/* When threaded I/O is also enabled for the reading + parsing side, the + * readable handler will just put normal clients into a queue of clients to + * process (instead of serving them synchronously). This function runs + * the queue using the I/O threads, and process them in order to accumulate + * the reads in the buffers, and also parse the first command available + * rendering it in the client structures. */ int handleClientsWithPendingReadsUsingThreads(void) { if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); From 340a723b87eff9df140bfb1de239ea65e318fee1 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 13:35:27 +0800 Subject: [PATCH 43/99] Makefile: 1TD -> STD --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 1c80e547f..f35685eff 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -1TD=-std=c11 -pedantic -DREDIS_STATIC='' +STD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions From d9d3d3065ba3f6fc941fc6886da8392193d0cb41 Mon Sep 17 00:00:00 2001 From: stan011 Date: Tue, 7 May 2019 14:22:40 +0800 Subject: [PATCH 44/99] change the comments there may have a mis type --- src/t_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_list.c b/src/t_list.c index 45d2e3317..54e4959b9 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -617,7 +617,7 @@ void rpoplpushCommand(client *c) { * the AOF and replication channel. * * The argument 'where' is LIST_TAIL or LIST_HEAD, and indicates if the - * 'value' element was popped fron the head (BLPOP) or tail (BRPOP) so that + * 'value' element was popped from the head (BLPOP) or tail (BRPOP) so that * we can propagate the command properly. * * The function returns C_OK if we are able to serve the client, otherwise From 48d591a010bbe4cf0c09a4d84a55ee3f31e5664f Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 15:59:16 +0800 Subject: [PATCH 45/99] fix memory leak when rewrite config file --- src/config.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/config.c b/src/config.c index 1686743a0..7f0e9af89 100644 --- a/src/config.c +++ b/src/config.c @@ -1711,12 +1711,11 @@ void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char * * If the old file does not exist at all, an empty state is returned. */ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { FILE *fp = fopen(path,"r"); - struct rewriteConfigState *state = zmalloc(sizeof(*state)); - char buf[CONFIG_MAX_LINE+1]; - int linenum = -1; - if (fp == NULL && errno != ENOENT) return NULL; + char buf[CONFIG_MAX_LINE+1]; + int linenum = -1; + struct rewriteConfigState *state = zmalloc(sizeof(*state)); state->option_to_line = dictCreate(&optionToLineDictType,NULL); state->rewritten = dictCreate(&optionSetDictType,NULL); state->numlines = 0; From 842dd85b264f7d77a12273f8b2e7700ce99dd610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=94=90=E6=9D=83?= Date: Wed, 8 May 2019 12:53:56 +0800 Subject: [PATCH 46/99] Update ziplist.c Hi, @antirez In the code, to get the size of ziplist, "unsigned int bytes = ZIPLIST_HEADER_SIZE+1;" is correct, but why not make it more readable and easy to understand --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 1579d1109..ef40d6aa2 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -576,7 +576,7 @@ void zipEntry(unsigned char *p, zlentry *e) { /* Create a new empty ziplist. */ unsigned char *ziplistNew(void) { - unsigned int bytes = ZIPLIST_HEADER_SIZE+1; + unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; unsigned char *zl = zmalloc(bytes); ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); From fb4ee7f0c5ad81cae88a10587d5246a1b4f4dd84 Mon Sep 17 00:00:00 2001 From: yongman Date: Wed, 8 May 2019 16:13:42 +0800 Subject: [PATCH 47/99] Fix uint64_t hash value in active defrag --- src/defrag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index d67b6e253..ecf0255dc 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -47,7 +47,7 @@ int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); /* forward declarations*/ void defragDictBucketCallback(void *privdata, dictEntry **bucketref); -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged); +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged); /* Defrag helper for generic allocations. * @@ -355,7 +355,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { sdsele = ln->value; if ((newsds = activeDefragSds(sdsele))) { /* When defragging an sds value, we need to update the dict key */ - unsigned int hash = dictGetHash(d, sdsele); + uint64_t hash = dictGetHash(d, sdsele); replaceSateliteDictKeyPtrAndOrDefragDictEntry(d, sdsele, newsds, hash, &defragged); ln->value = newsds; defragged++; @@ -392,7 +392,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { * moved. Return value is the the dictEntry if found, or NULL if not found. * NOTE: this is very ugly code, but it let's us avoid the complication of * doing a scan on another dict. */ -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged) { +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged) { dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); if (deref) { dictEntry *de = *deref; From bea09a7fa6c2d332d6b298fb7a91cc04099faf47 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 11:36:31 +0100 Subject: [PATCH 48/99] Add include to deps/hiredis/read.c to fix Implicit Declaration of strcasecmp warning --- deps/hiredis/read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c index c75c3435f..cc0f3cc72 100644 --- a/deps/hiredis/read.c +++ b/deps/hiredis/read.c @@ -31,6 +31,7 @@ #include "fmacros.h" #include +#include #include #ifndef _MSC_VER #include From a1fb0be1d7c1228c7b1c076426cbdf30f9489077 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 12:13:45 +0100 Subject: [PATCH 49/99] Enlarge error buffer in redis-check-aof.c to remove compiler warning of output truncation through snprintf format string --- src/redis-check-aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 54ed85f0d..eedb09db5 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -37,7 +37,7 @@ snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } -static char error[1024]; +static char error[1044]; static off_t epos; int consumeNewline(char *buf) { From c64aec9ce7ead321f7b29f6a2a29649b36d4a4a0 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Fri, 10 May 2019 16:27:25 +0800 Subject: [PATCH 50/99] test cases: skiptill -> skip-till --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 568eacdee..1442067f5 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -503,7 +503,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--only}} { lappend ::only_tests $arg incr j - } elseif {$opt eq {--skiptill}} { + } elseif {$opt eq {--skip-till}} { set ::skip_till $arg incr j } elseif {$opt eq {--list-tests}} { From 09186514381019ce6d07f5d0d78d19e229b00c84 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 10 May 2019 23:47:52 -0400 Subject: [PATCH 51/99] Add byline to logo --- src/asciilogo.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/asciilogo.h b/src/asciilogo.h index 5264a6919..c8b18a0ac 100644 --- a/src/asciilogo.h +++ b/src/asciilogo.h @@ -36,4 +36,7 @@ const char *ascii_logo = " Port: %d\n" " PID: %ld\n" " \n" +" Like KeyDB? Star us on GitHub! \n" +" \n" +" https://github.com/JohnSully/KeyDB \n" " \n\n"; From 54d6eb851bbea48e858d84bca818cb14c65b6403 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 11 May 2019 17:57:35 -0400 Subject: [PATCH 52/99] Make lazy free test more reliable --- tests/unit/lazyfree.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl index 1e568ed78..b9b617085 100644 --- a/tests/unit/lazyfree.tcl +++ b/tests/unit/lazyfree.tcl @@ -2,11 +2,11 @@ start_server {tags {"lazyfree"}} { test "UNLINK can reclaim memory in background" { set orig_mem [s used_memory] set args {} - for {set i 0} {$i < 100000} {incr i} { + for {set i 0} {$i < 200000} {incr i} { lappend args $i } r sadd myset {*}$args - assert {[r scard myset] == 100000} + assert {[r scard myset] == 200000} set peak_mem [s used_memory] assert {[r unlink myset] == 1} assert {$peak_mem > $orig_mem+1000000} From aa274960fff095c4c3cd570cdbaaabb9ca005651 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 11 May 2019 19:26:59 -0400 Subject: [PATCH 53/99] Fix a few issues from the merge --- src/server.cpp | 2 ++ src/server.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 4d10b478f..f7b7dba3f 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2146,8 +2146,10 @@ void beforeSleepLite(struct aeEventLoop *eventLoop) /* Handle writes with pending output buffers. */ handleClientsWithPendingWrites(iel); + aeAcquireLock(); /* Close clients that need to be closed asynchronous */ freeClientsInAsyncFreeQueue(iel); + aeReleaseLock(); /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this diff --git a/src/server.h b/src/server.h index f315bdcd8..e2b0fd089 100644 --- a/src/server.h +++ b/src/server.h @@ -151,8 +151,6 @@ class robj_roptr #define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 -#define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ -#define CONFIG_DEFAULT_IO_THREADS_DO_READS 0 /* Read + parse from threads? */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) @@ -1765,7 +1763,6 @@ int writeToClient(int fd, client *c, int handler_installed); void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); -void initThreadedIO(void); // Special Thread-safe addReply() commands for posting messages to clients from a different thread void addReplyAsync(client *c, robj_roptr obj); From 866b8ead9147e454fe577ec26bb6cf3a502b091f Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 11 May 2019 21:53:30 -0400 Subject: [PATCH 54/99] Update Active Replication wiki links --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 13c2de99e..7917a3ad8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ What is KeyDB? -------------- -KeyDB is a high performance fork of Redis focusing on multithreading, memory efficiency, and high throughput. In addition to multithreading KeyDB also has features only available in Redis Enterprise such as [Active Replication](https://github.com/JohnSully/KeyDB/wiki/KeyDB-(Redis-Fork):-Active-Replica-Support), [FLASH storage](https://github.com/JohnSully/KeyDB/wiki/FLASH-Storage) support, and some not available at all such as direct backup to AWS S3. +KeyDB is a high performance fork of Redis focusing on multithreading, memory efficiency, and high throughput. In addition to multithreading KeyDB also has features only available in Redis Enterprise such as [Active Replication](https://github.com/JohnSully/KeyDB/wiki/Active-Replication), [FLASH storage](https://github.com/JohnSully/KeyDB/wiki/FLASH-Storage) support, and some not available at all such as direct backup to AWS S3. On the same hardware KeyDB can perform twice as many queries per second as Redis, with 60% lower latency. @@ -19,7 +19,7 @@ Management GUI: We recommend [FastoNoSQL](https://fastonosql.com/) which has off New: Active Replica Support --------------------------- -New! KeyDB now has support for Active Replicas. This feature greatly simplifies hot-spare failover and allows you to distribute writes over replicas instead of just a single master. For more information [see the wiki page](https://github.com/JohnSully/KeyDB/wiki/KeyDB-(Redis-Fork):-Active-Replica-Support). +New! KeyDB now has support for Active Replicas. This feature greatly simplifies hot-spare failover and allows you to distribute writes over replicas instead of just a single master. For more information [see the wiki page](https://github.com/JohnSully/KeyDB/wiki/Active-Replication). Why fork Redis? --------------- @@ -249,4 +249,3 @@ source distribution. Please see the CONTRIBUTING file in this source distribution for more information. - From 3d0de946bb73a34f96cd773e85c102c2743e6480 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 18 May 2019 19:52:43 -0400 Subject: [PATCH 55/99] Fix: Active replication falls behind under high load --- src/replication.cpp | 84 ++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index eef679ea7..f37473fc5 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -229,7 +229,7 @@ void feedReplicationBacklogWithObject(robj *o) { feedReplicationBacklog(p,len); } -void replicationFeedSlave(client *slave, int dictid, robj **argv, int argc) +void replicationFeedSlave(client *slave, int dictid, robj **argv, int argc, bool fSendRaw) { char llstr[LONG_STR_SIZE]; std::unique_locklock)> lock(slave->lock); @@ -252,7 +252,8 @@ void replicationFeedSlave(client *slave, int dictid, robj **argv, int argc) } /* Add the SELECT command into the backlog. */ - if (g_pserver->repl_backlog) feedReplicationBacklogWithObject(selectcmd); + /* We don't do this for advanced replication because this will be done later when it adds the whole RREPLAY command */ + if (g_pserver->repl_backlog && fSendRaw) feedReplicationBacklogWithObject(selectcmd); /* Send it to slaves */ addReply(slave,selectcmd); @@ -302,34 +303,9 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { client *fake = createClient(-1, serverTL - g_pserver->rgthreadvar); fake->flags |= CLIENT_FORCE_REPLY; - replicationFeedSlave(fake, dictid, argv, argc); // Note: updates the repl log, keep above the repl update code below + bool fSendRaw = !g_pserver->fActiveReplica; + replicationFeedSlave(fake, dictid, argv, argc, fSendRaw); // Note: updates the repl log, keep above the repl update code below - /* Write the command to the replication backlog if any. */ - if (g_pserver->repl_backlog) { - char aux[LONG_STR_SIZE+3]; - - /* Add the multi bulk reply length. */ - aux[0] = '*'; - len = ll2string(aux+1,sizeof(aux)-1,argc); - aux[len+1] = '\r'; - aux[len+2] = '\n'; - feedReplicationBacklog(aux,len+3); - - for (j = 0; j < argc; j++) { - long objlen = stringObjectLen(argv[j]); - - /* We need to feed the buffer with the object as a bulk reply - * not just as a plain string, so create the $..CRLF payload len - * and add the final CRLF */ - aux[0] = '$'; - len = ll2string(aux+1,sizeof(aux)-1,objlen); - aux[len+1] = '\r'; - aux[len+2] = '\n'; - feedReplicationBacklog(aux,len+3); - feedReplicationBacklogWithObject(argv[j]); - feedReplicationBacklog(aux+len+1,2); - } - } long long cchbuf = fake->bufpos; listRewind(fake->reply, &liReply); @@ -339,8 +315,6 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { cchbuf += reply->used; } - bool fSendRaw = !g_pserver->fActiveReplica; - serverAssert(argc > 0); serverAssert(cchbuf > 0); @@ -348,7 +322,51 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { uuid_unparse(cserver.uuid, uuid); char proto[1024]; int cchProto = snprintf(proto, sizeof(proto), "*3\r\n$7\r\nRREPLAY\r\n$%d\r\n%s\r\n$%lld\r\n", (int)strlen(uuid), uuid, cchbuf); - cchProto = std::min((int)sizeof(proto), cchProto); + cchProto = std::min((int)sizeof(proto), cchProto); + + /* Write the command to the replication backlog if any. */ + if (g_pserver->repl_backlog) + { + if (fSendRaw) + { + char aux[LONG_STR_SIZE+3]; + + /* Add the multi bulk reply length. */ + aux[0] = '*'; + len = ll2string(aux+1,sizeof(aux)-1,argc); + aux[len+1] = '\r'; + aux[len+2] = '\n'; + feedReplicationBacklog(aux,len+3); + + for (j = 0; j < argc; j++) { + long objlen = stringObjectLen(argv[j]); + + /* We need to feed the buffer with the object as a bulk reply + * not just as a plain string, so create the $..CRLF payload len + * and add the final CRLF */ + aux[0] = '$'; + len = ll2string(aux+1,sizeof(aux)-1,objlen); + aux[len+1] = '\r'; + aux[len+2] = '\n'; + feedReplicationBacklog(aux,len+3); + feedReplicationBacklogWithObject(argv[j]); + feedReplicationBacklog(aux+len+1,2); + } + } + else + { + feedReplicationBacklog(proto, cchProto); + feedReplicationBacklog(fake->buf, fake->bufpos); + listRewind(fake->reply, &liReply); + while ((lnReply = listNext(&liReply))) + { + clientReplyBlock* reply = (clientReplyBlock*)listNodeValue(lnReply); + feedReplicationBacklog(reply->buf(), reply->used); + } + const char *crlf = "\r\n"; + feedReplicationBacklog(crlf, 2); + } + } /* Write the command to every slave. */ listRewind(slaves,&li); @@ -3235,6 +3253,7 @@ void replicaReplayCommand(client *c) return; // OK We've recieved a command lets execute + client *current_clientSave = serverTL->current_client; client *cFake = createClient(-1, c->iel); cFake->lock.lock(); cFake->authenticated = c->authenticated; @@ -3245,6 +3264,7 @@ void replicaReplayCommand(client *c) cFake->lock.unlock(); addReply(c, shared.ok); freeClient(cFake); + serverTL->current_client = current_clientSave; // call() will not propogate this for us, so we do so here if (!s_pstate->FCancelled() && s_pstate->FFirst()) From 23b0268a7fc1f7116e96905502895a445d2caf76 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 18 May 2019 20:22:16 -0400 Subject: [PATCH 56/99] Print nicer error messages for uncaught exceptions --- src/Makefile | 2 +- src/server.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index aa7b204ff..c197d0927 100644 --- a/src/Makefile +++ b/src/Makefile @@ -21,7 +21,7 @@ NODEPS:=clean distclean # Default settings STD=-std=c11 -pedantic -DREDIS_STATIC='' -CXX_STD=-std=c++14 -pedantic -fno-rtti -fno-exceptions -D__STDC_FORMAT_MACROS +CXX_STD=-std=c++14 -pedantic -fno-rtti -D__STDC_FORMAT_MACROS ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions diff --git a/src/server.cpp b/src/server.cpp index f7b7dba3f..af62c78c3 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4919,6 +4919,16 @@ void incrementMvccTstamp() } } +void OnTerminate() +{ + /* Any uncaught exception will call std::terminate(). + We want this handled like a segfault (printing the stack trace etc). + The easiest way to achieve that is to acutally segfault, so we assert + here. + */ + serverAssert(false); +} + void *workerThreadMain(void *parg) { int iel = (int)((int64_t)parg); @@ -4939,6 +4949,8 @@ int main(int argc, char **argv) { struct timeval tv; int j; + std::set_terminate(OnTerminate); + #ifdef USE_MEMKIND storage_init(NULL, 0); #endif From c57d8760bd9f824555366d1685b12e959ef36f4c Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 18 May 2019 20:41:05 -0400 Subject: [PATCH 57/99] Fix build break on some compilers --- src/new.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/new.h b/src/new.h index e2ec0032f..7ea65e979 100644 --- a/src/new.h +++ b/src/new.h @@ -12,12 +12,12 @@ inline void *operator new(size_t size, enum MALLOC_CLASS mclass) return zmalloc(size, mclass); } -inline void operator delete(void * p) +inline void operator delete(void * p) noexcept { zfree(p); } -inline void operator delete(void *p, std::size_t) +inline void operator delete(void *p, std::size_t) noexcept { zfree(p); } \ No newline at end of file From 39a60c8120ac4a7c2aad83df00a4fbea07a801cc Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 18 May 2019 20:58:48 -0400 Subject: [PATCH 58/99] Fix build breaks on some older compilers --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index e2b0fd089..68fde108e 100644 --- a/src/server.h +++ b/src/server.h @@ -2465,7 +2465,7 @@ struct redisMaster *MasterInfoFromClient(client *c); uint64_t getMvccTstamp(); void incrementMvccTstamp(); -#if defined(__GNUC__) && !defined(NO_DEPRECATE_FREE) +#if __GNUC__ >= 7 && !defined(NO_DEPRECATE_FREE) [[deprecated]] void *calloc(size_t count, size_t size); [[deprecated]] From a78169e9e043aea83c3f8b8fbb14e605575f8575 Mon Sep 17 00:00:00 2001 From: Qix Date: Sun, 19 May 2019 22:18:10 +0200 Subject: [PATCH 59/99] cast void data pointer to const char * to fix gcc --- src/modules/hellotimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/hellotimer.c b/src/modules/hellotimer.c index 57b111b7c..27da7ec1b 100644 --- a/src/modules/hellotimer.c +++ b/src/modules/hellotimer.c @@ -40,7 +40,7 @@ /* Timer callback. */ void timerHandler(RedisModuleCtx *ctx, void *data) { REDISMODULE_NOT_USED(ctx); - printf("Fired %s!\n", data); + printf("Fired %s!\n", (const char *) data); RedisModule_Free(data); } From 8c56dbf971aced9e18d170dfffb0c074b545f718 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 20 May 2019 10:57:37 -0400 Subject: [PATCH 60/99] Protocol regression from RESP3 work --- src/t_zset.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/t_zset.cpp b/src/t_zset.cpp index ec0c764dd..2f1032fe2 100644 --- a/src/t_zset.cpp +++ b/src/t_zset.cpp @@ -2595,7 +2595,10 @@ void genericZrangebyscoreCommand(client *c, int reverse) { /* No "first" element in the specified interval. */ if (eptr == NULL) { - addReplyNull(c); + if (c->resp < 3) + addReply(c, shared.emptyarray); + else + addReplyNull(c); return; } From 87dc8ce9b3e376e4a6264aee1d2787b9f8d6437f Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 20 May 2019 15:51:36 -0400 Subject: [PATCH 61/99] Fix double unlock --- src/networking.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/networking.cpp b/src/networking.cpp index 0c70922cd..4f737ec8e 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1546,7 +1546,6 @@ int writeToClient(int fd, client *c, int handler_installed) { } else { - lock.unlock(); freeClientAsync(c); } return C_ERR; From d7f21f02b30c3f9092dbf9aec75481baf45c3311 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 20 May 2019 16:12:26 -0400 Subject: [PATCH 62/99] The unstable branch doesn't have a version # --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 489a16290..0c56a20a9 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,2 @@ -#define KEYDB_REAL_VERSION "0.9.2" +#define KEYDB_REAL_VERSION "0.0.0" extern const char *KEYDB_SET_VERSION; // Unlike real version, this can be overriden by the config \ No newline at end of file From c5cbb77fc7e3b2b5124b9432e144d8c74190c755 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 20 May 2019 23:39:44 -0400 Subject: [PATCH 63/99] Fix regressions from RESP3 changes in commit 317f8b9d383f1b7f171aef7ea29f9e05abf0ba83 --- src/networking.cpp | 14 ++++++++++---- src/object.cpp | 2 +- src/pubsub.cpp | 4 ++-- src/server.cpp | 2 ++ src/server.h | 4 ++-- src/t_list.cpp | 4 ++-- src/t_zset.cpp | 2 +- 7 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/networking.cpp b/src/networking.cpp index 4f737ec8e..923535bd2 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -839,8 +839,11 @@ void addReplyNullCore(client *c, bool fAsync) { } } -void addReplyNull(client *c) { - addReplyNullCore(c, false); +void addReplyNull(client *c, robj_roptr objOldProtocol) { + if (c->resp < 3 && objOldProtocol != nullptr) + addReply(c, objOldProtocol); + else + addReplyNullCore(c, false); } void addReplyNullAsync(client *c) { @@ -932,7 +935,10 @@ void addReplyBulkSdsAsync(client *c, sds s) { /* Add a C null term string as bulk reply */ void addReplyBulkCStringCore(client *c, const char *s, bool fAsync) { if (s == NULL) { - addReplyNullCore(c,fAsync); + if (c->resp < 3) + addReplyCore(c,shared.nullbulk, fAsync); + else + addReplyNullCore(c,fAsync); } else { addReplyBulkCBufferCore(c,s,strlen(s),fAsync); } @@ -2540,7 +2546,7 @@ NULL if (c->name) addReplyBulk(c,c->name); else - addReplyNull(c); + addReplyNull(c, shared.nullbulk); } else if (!strcasecmp((const char*)ptrFromObj(c->argv[1]),"pause") && c->argc == 3) { long long duration; diff --git a/src/object.cpp b/src/object.cpp index 0ca578de1..de770e47e 100644 --- a/src/object.cpp +++ b/src/object.cpp @@ -1341,7 +1341,7 @@ NULL } } if ((de = dictFind(c->db->pdict,ptrFromObj(c->argv[2]))) == NULL) { - addReplyNull(c); + addReplyNull(c, shared.nullbulk); return; } size_t usage = objectComputeSize((robj*)dictGetVal(de),samples); diff --git a/src/pubsub.cpp b/src/pubsub.cpp index 61a6dc373..6a9c2bdfc 100644 --- a/src/pubsub.cpp +++ b/src/pubsub.cpp @@ -84,7 +84,7 @@ void addReplyPubsubUnsubscribed(client *c, robj *channel) { if (channel) addReplyBulk(c,channel); else - addReplyNull(c); + addReplyNull(c, shared.nullbulk); addReplyLongLong(c,clientSubscriptionsCount(c)); } @@ -112,7 +112,7 @@ void addReplyPubsubPatUnsubscribed(client *c, robj *pattern) { if (pattern) addReplyBulk(c,pattern); else - addReplyNull(c); + addReplyNull(c, shared.nullbulk); addReplyLongLong(c,clientSubscriptionsCount(c)); } diff --git a/src/server.cpp b/src/server.cpp index af62c78c3..46e4aa9a8 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2174,6 +2174,8 @@ void createSharedObjects(void) { shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n")); shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n")); shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n")); + shared.emptymultibulk = createObject(OBJ_STRING,sdsnew("*0\r\n")); + shared.nullbulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n")); shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n")); shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n")); shared.emptyarray = createObject(OBJ_STRING,sdsnew("*0\r\n")); diff --git a/src/server.h b/src/server.h index 68fde108e..72c2bf634 100644 --- a/src/server.h +++ b/src/server.h @@ -978,7 +978,7 @@ struct moduleLoadQueueEntry { }; struct sharedObjectsStruct { - robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space, + robj *crlf, *ok, *err, *emptybulk, *emptymultibulk, *nullbulk, *czero, *cone, *pong, *space, *colon, *queued, *null[4], *nullarray[4], *emptyarray, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr, @@ -1701,7 +1701,7 @@ void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask); void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask); void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask); -void addReplyNull(client *c); +void addReplyNull(client *c, robj_roptr objOldProtocol = nullptr); void addReplyNullArray(client *c); void addReplyBool(client *c, int b); void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext); diff --git a/src/t_list.cpp b/src/t_list.cpp index b072bf957..ecef14452 100644 --- a/src/t_list.cpp +++ b/src/t_list.cpp @@ -331,7 +331,7 @@ void lindexCommand(client *c) { addReplyBulk(c,value); decrRefCount(value); } else { - addReplyNull(c); + addReplyNull(c,shared.nullbulk); } } else { serverPanic("Unknown list encoding"); @@ -414,7 +414,7 @@ void lrangeCommand(client *c) { /* Invariant: start >= 0, so this test will be true when end < 0. * The range is empty when start > end or start >= length. */ if (start > end || start >= llen) { - addReplyNull(c); + addReplyNull(c,shared.emptymultibulk); return; } if (end >= llen) end = llen-1; diff --git a/src/t_zset.cpp b/src/t_zset.cpp index 2f1032fe2..1c965813d 100644 --- a/src/t_zset.cpp +++ b/src/t_zset.cpp @@ -2439,7 +2439,7 @@ void zrangeGenericCommand(client *c, int reverse) { /* Invariant: start >= 0, so this test will be true when end < 0. * The range is empty when start > end or start >= length. */ if (start > end || start >= llen) { - addReplyNull(c); + addReplyNull(c,shared.emptymultibulk); return; } if (end >= llen) end = llen-1; From cd3c482d5e36bc82fcef653e8020f68e2ac21938 Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 21 May 2019 00:04:12 -0400 Subject: [PATCH 64/99] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7917a3ad8..37e735faf 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ Try our docker container: https://hub.docker.com/r/eqalpha/keydb Talk on Gitter: https://gitter.im/KeyDB +[Subscribe to the KeyDB mailing list](https://eqalpha.us20.list-manage.com/subscribe/post?u=978f486c2f95589b24591a9cc&id=4ab9220500) + Management GUI: We recommend [FastoNoSQL](https://fastonosql.com/) which has official KeyDB support. New: Active Replica Support From a6c56416fc1bf09f1ecbae45195290209aa93c89 Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 21 May 2019 15:12:12 -0400 Subject: [PATCH 65/99] Implement MOTD feature in keydb-cli --- src/Makefile | 2 +- src/redis-cli.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index c197d0927..0d5801d45 100644 --- a/src/Makefile +++ b/src/Makefile @@ -264,7 +264,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME) # keydb-cli $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS) + $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS) -lcurl # keydb-benchmark $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ) diff --git a/src/redis-cli.c b/src/redis-cli.c index ab3de2e73..50e2e8480 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ @@ -6532,6 +6534,116 @@ static void intrinsicLatencyMode(void) { } } +/*------------------------------------------------------------------------------ + * Message of the day + *--------------------------------------------------------------------------- */ +#ifndef NO_MOTD +#include + +static const char *szMotdCachePath() +{ + static sds sdsMotdCachePath = NULL; + if (sdsMotdCachePath != NULL) + return sdsMotdCachePath; + + struct passwd *pw = getpwuid(getuid()); + if (pw == NULL) + return ""; + const char *homedir = pw->pw_dir; + sdsMotdCachePath = sdsnew(homedir); + sdsMotdCachePath = sdscat(sdsMotdCachePath, "/.keydb-cli-motd"); + return sdsMotdCachePath; +} +static size_t motd_write_callback(void *ptr, size_t size, size_t nmemb, sds *str) +{ + *str = sdscatlen(*str, ptr, size*nmemb); + return (size*nmemb); +} + +static char *fetchMOTDFromCache() +{ + struct stat attrib; + if (stat(szMotdCachePath(), &attrib) != 0) + return NULL; + time_t t = attrib.st_mtim.tv_sec; + time_t now = time(NULL); + if ((now - t) < 14400) + { + // If our cache was updated no more than 4 hours ago use it instead of fetching the MOTD + FILE *pf = fopen(szMotdCachePath(), "rb"); + if (pf == NULL) + return NULL; + fseek(pf, 0L, SEEK_END); + long cb = ftell(pf); + fseek(pf, 0L, SEEK_SET); // rewind + sds str = sdsnewlen(NULL, cb); + size_t cbRead = fread(str, 1, cb, pf); + fclose(pf); + if ((long)cbRead != cb) + { + sdsfree(str); + return NULL; + } + return str; + } + return NULL; +} + +static void setMOTDCache(const char *sz) +{ + FILE *pf = fopen(szMotdCachePath(), "wb"); + size_t celem = fwrite(sz, strlen(sz), 1, pf); + (void)celem; // best effort + fclose(pf); +} + +static char *fetchMOTD() +{ + sds str; + CURL *curl; + CURLcode res; + + /* First try and get the string from the cache */ + str = fetchMOTDFromCache(); + if (str != NULL) + return str; + + str = sdsnew(""); + curl = curl_easy_init(); + if(curl) { + curl_easy_setopt(curl, CURLOPT_URL, "http://api.keydb.dev/motd/motd.txt"); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // follow redirects + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 2); // take no more than two seconds + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, motd_write_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &str); + + /* Perform the request, res will get the return code */ + res = curl_easy_perform(curl); + /* Check for errors */ + if(res != CURLE_OK) + { + sdsfree(str); + str = NULL; + } + + /* always cleanup */ + curl_easy_cleanup(curl); + + if (str != NULL) + setMOTDCache(str); + } + return str; +} + +#else + +static char *fetchMOTD() +{ + return NULL; +} + +#endif + /*------------------------------------------------------------------------------ * Program main() *--------------------------------------------------------------------------- */ @@ -6700,6 +6812,15 @@ int main(int argc, char **argv) { /* Start interactive mode when no command is provided */ if (argc == 0 && !config.eval) { + /* Show the message of the day if we are interactive */ + if (config.output == OUTPUT_STANDARD) { + char *szMotd = fetchMOTD(); + if (szMotd != NULL) { + printf("Message of the day:\n %s\n", szMotd); + sdsfree(szMotd); + } + } + /* Ignore SIGPIPE in interactive mode to force a reconnect */ signal(SIGPIPE, SIG_IGN); @@ -6711,6 +6832,7 @@ int main(int argc, char **argv) { /* Otherwise, we have some arguments to execute */ if (cliConnect(0) != REDIS_OK) exit(1); + if (config.eval) { return evalMode(argc,argv); } else { From 97b65ce19c09188acd0d662aad8d774a55531437 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 2 Jun 2019 15:50:50 -0400 Subject: [PATCH 66/99] Fix crash adding duplicate replica. Just send an error instead --- src/replication.cpp | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index f37473fc5..e9e37f191 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -2240,7 +2240,8 @@ struct redisMaster *replicationAddMaster(char *ip, int port) { while ((ln = listNext(&li))) { redisMaster *miCheck = (redisMaster*)listNodeValue(ln); - serverAssert(strcasecmp(miCheck->masterhost, ip) || miCheck->masterport != port); + if (strcasecmp(miCheck->masterhost, ip)==0 && miCheck->masterport == port) + return nullptr; } // Pre-req satisfied, lets continue @@ -2380,26 +2381,18 @@ void replicaofCommand(client *c) { if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != C_OK)) return; - /* Check if we are already attached to the specified slave */ - listIter li; - listNode *ln; - listRewind(g_pserver->masters, &li); - while ((ln = listNext(&li))) + redisMaster *miNew = replicationAddMaster((char*)ptrFromObj(c->argv[1]), port); + if (miNew == nullptr) { - redisMaster *mi = (redisMaster*)listNodeValue(ln); - if (!strcasecmp(mi->masterhost,(const char*)ptrFromObj(c->argv[1])) - && mi->masterport == port) { - serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " - "with the master we are already connected " - "with. No operation performed."); - addReplySds(c,sdsnew("+OK Already connected to specified " - "master\r\n")); - return; - } + // We have a duplicate + serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " + "with the master we are already connected " + "with. No operation performed."); + addReplySds(c,sdsnew("+OK Already connected to specified " + "master\r\n")); + return; } - /* There was no previous master or the user specified a different one, - * we can continue. */ - redisMaster *miNew = replicationAddMaster((char*)ptrFromObj(c->argv[1]), port); + sds client = catClientInfoString(sdsempty(),c); serverLog(LL_NOTICE,"REPLICAOF %s:%d enabled (user request from '%s')", miNew->masterhost, miNew->masterport, client); From 46d6caa47c57fa13055615dfc2376b0962468b81 Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 11 Jun 2019 01:34:36 -0400 Subject: [PATCH 67/99] Enable load balancing of UNIX sockets across threads --- src/networking.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/networking.cpp b/src/networking.cpp index 923535bd2..c5a921247 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1175,7 +1175,17 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) { serverLog(LL_VERBOSE,"Accepted connection to %s", g_pserver->unixsocket); aeAcquireLock(); - acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL, ielCur); + int ielTarget = rand() % cserver.cthreads; + if (ielTarget == ielCur) + { + acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL, ielCur); + } + else + { + aePostFunction(g_pserver->rgthreadvar[ielTarget].el, [cfd, ielTarget]{ + acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL, ielTarget); + }); + } aeReleaseLock(); } From ec57b4b0248bba671e388a2257b1bd65ed8d0f44 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 15 Jun 2019 23:53:34 -0400 Subject: [PATCH 68/99] Fallback to a futex if we spin for a long time --- src/fastlock.cpp | 47 +++++++++++++++++++++++++++++----- src/fastlock.h | 16 ++++++++++-- src/fastlock_x64.asm | 61 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 104 insertions(+), 20 deletions(-) diff --git a/src/fastlock.cpp b/src/fastlock.cpp index f1e13a279..4d3327318 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include #ifdef __APPLE__ #include @@ -64,6 +66,14 @@ uint64_t fastlock_getlongwaitcount() return g_longwaits; } +#ifndef ASM_SPINLOCK +static int futex(volatile unsigned *uaddr, int futex_op, int val, + const struct timespec *timeout, int val3) +{ + return syscall(SYS_futex, uaddr, futex_op, val, + timeout, uaddr, val3); +} +#endif extern "C" pid_t gettid() { @@ -88,6 +98,7 @@ extern "C" void fastlock_init(struct fastlock *lock) lock->m_ticket.m_avail = 0; lock->m_depth = 0; lock->m_pidOwner = -1; + lock->futex = 0; } #ifndef ASM_SPINLOCK @@ -100,18 +111,24 @@ extern "C" void fastlock_lock(struct fastlock *lock) } unsigned myticket = __atomic_fetch_add(&lock->m_ticket.m_avail, 1, __ATOMIC_RELEASE); - + unsigned mask = (1U << (myticket % 32)); int cloops = 0; - while (__atomic_load_2(&lock->m_ticket.m_active, __ATOMIC_ACQUIRE) != myticket) + ticket ticketT; + while (((ticketT.u = __atomic_load_4(&lock->m_ticket.m_active, __ATOMIC_ACQUIRE)) & 0xffff) != myticket) { +#if defined(__i386__) || defined(__amd64__) + __asm__ ("pause"); +#endif if ((++cloops % 1024*1024) == 0) { - sched_yield(); + if (static_cast(ticketT.m_active+1U) != myticket) + { + __atomic_fetch_or(&lock->futex, mask, __ATOMIC_ACQUIRE); + futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, ticketT.u, nullptr, mask); + __atomic_fetch_and(&lock->futex, ~mask, __ATOMIC_RELEASE); + } ++g_longwaits; } -#if defined(__i386__) || defined(__amd64__) - __asm__ ("pause"); -#endif } lock->m_depth = 1; @@ -145,6 +162,21 @@ extern "C" int fastlock_trylock(struct fastlock *lock) return false; } +#define ROL32(v, shift) ((v << shift) | (v >> (32-shift))) +void unlock_futex(struct fastlock *lock, uint16_t ifutex) +{ + unsigned mask = (1U << (ifutex % 32)); + unsigned futexT = __atomic_load_4(&lock->futex, __ATOMIC_RELAXED) & mask; + + if (futexT == 0) + return; + + while (__atomic_load_4(&lock->futex, __ATOMIC_ACQUIRE) & mask) + { + if (futex(&lock->m_ticket.u, FUTEX_WAKE_BITSET_PRIVATE, INT_MAX, nullptr, mask) == 1) + break; + } +} extern "C" void fastlock_unlock(struct fastlock *lock) { --lock->m_depth; @@ -153,7 +185,8 @@ extern "C" void fastlock_unlock(struct fastlock *lock) assert((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_RELAXED) >= 0); // unlock after free lock->m_pidOwner = -1; std::atomic_thread_fence(std::memory_order_acquire); - __atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); // on x86 the atomic is not required here, but ASM handles that case + uint16_t activeNew = __atomic_add_fetch(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); // on x86 the atomic is not required here, but ASM handles that case + unlock_futex(lock, activeNew); } } #endif diff --git a/src/fastlock.h b/src/fastlock.h index b8027de54..81c5807cd 100644 --- a/src/fastlock.h +++ b/src/fastlock.h @@ -20,17 +20,29 @@ uint64_t fastlock_getlongwaitcount(); // this is a global value } #endif +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" struct ticket { - uint16_t m_active; - uint16_t m_avail; + union + { + struct + { + uint16_t m_active; + uint16_t m_avail; + }; + unsigned u; + }; }; +#pragma GCC diagnostic pop + struct fastlock { volatile struct ticket m_ticket; volatile int m_pidOwner; volatile int m_depth; + unsigned futex; #ifdef __cplusplus fastlock() diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index 7645d3baa..f7d1a3093 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -22,7 +22,7 @@ fastlock_lock: push rdi ; we need our struct pointer (also balance the stack for the call) call gettid ; get our thread ID (TLS is nasty in ASM so don't bother inlining) mov esi, eax ; back it up in esi - mov rdi, [rsp] ; get our pointer back + pop rdi ; get our pointer back cmp [rdi+4], esi ; Is the TID we got back the owner of the lock? je .LLocked ; Don't spin in that case @@ -30,11 +30,11 @@ fastlock_lock: xor eax, eax ; eliminate partial register dependency inc eax ; we want to add one lock xadd [rdi+2], ax ; do the xadd, ax contains the value before the addition - ; eax now contains the ticket - xor ecx, ecx + ; ax now contains the ticket ALIGN 16 .LLoop: - cmp [rdi], ax ; is our ticket up? + mov edx, [rdi] + cmp dx, ax ; is our ticket up? je .LLocked ; leave the loop pause add ecx, 1000h ; Have we been waiting a long time? (oflow if we have) @@ -44,22 +44,38 @@ ALIGN 16 ; But the compiler doesn't know that we rarely hit this, and when we do we know the lock is ; taking a long time to be released anyways. We optimize for the common case of short ; lock intervals. That's why we're using a spinlock in the first place + inc edx + cmp dx, ax + je .LLoop + dec edx ; restore the current ticket +.LFutexWait: push rsi push rax - mov rax, 24 ; sys_sched_yield - syscall ; give up our timeslice we'll be here a while - pop rax - pop rsi + ; Setup the syscall args + ; rdi ARG1 futex (already in rdi) + mov esi, (9 | 128) ; rsi ARG2 FUTEX_WAIT_BITSET_PRIVATE + ; rdx ARG3 ticketT.u (already in edx) + xor r10d, r10d ; r10 ARG4 NULL + mov r8, rdi ; r8 ARG5 dup rdi + xor r9d, r9d + bts r9d, eax ; r9 ARG6 mask + mov eax, 202 ; sys_futex + ; Do the syscall + lock or [rdi+12], r9d ; inform the unlocking thread we're waiting + syscall ; wait for the futex + not r9d ; convert our flag into a mask of bits not to touch + lock and [rdi+12], r9d ; clear the flag in the futex control mask + ; cleanup and continue mov rcx, g_longwaits inc qword [rcx] ; increment our long wait counter - mov rdi, [rsp] ; our struct pointer is on the stack already + pop rax + pop rsi xor ecx, ecx ; Reset our loop counter jmp .LLoop ; Get back in the game ALIGN 16 .LLocked: mov [rdi+4], esi ; lock->m_pidOwner = gettid() inc dword [rdi+8] ; lock->m_depth++ - add rsp, 8 ; fix stack ret ALIGN 16 @@ -114,9 +130,32 @@ fastlock_unlock: ; uint16_t avail ; int32_t m_pidOwner ; int32_t m_depth + push r11 sub dword [rdi+8], 1 ; decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state jnz .LDone ; if depth is non-zero this is a recursive unlock, and we still hold it mov dword [rdi+4], -1 ; pidOwner = -1 (we don't own it anymore) - inc word [rdi] ; give up our ticket (note: lock is not required here because the spinlock itself guards this variable) + mov ecx, [rdi] ; get current active (this one) + inc ecx ; bump it to the next thread + mov [rdi], cx ; give up our ticket (note: lock is not required here because the spinlock itself guards this variable) + ; At this point the lock is removed, however we must wake up any pending futexs + mov r9d, 1 ; eax is the bitmask for 2 threads + rol r9d, cl ; place the mask in the right spot for the next 2 threads +ALIGN 16 +.LRetryWake: + mov r11d, [rdi+12] ; load the futex mask + and r11d, r9d ; are any threads waiting on a futex? + jz .LDone ; if not we're done. + ; we have to wake the futexs + ; rdi ARG1 futex (already in rdi) + mov esi, (10 | 128) ; rsi ARG2 FUTEX_WAKE_BITSET_PRIVATE + mov edx, 0x7fffffff ; rdx ARG3 INT_MAX (number of threads to wake) + xor r10d, r10d ; r10 ARG4 NULL + mov r8, rdi ; r8 ARG5 dup rdi + ; r9 ARG6 mask (already set above) + mov eax, 202 ; sys_futex + syscall + cmp eax, 1 ; did we wake as many as we expected? + jnz .LRetryWake .LDone: + pop r11 ret From 8b76b01bfe710603bcdc101da6eb27afcee7e1b1 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 00:00:34 -0400 Subject: [PATCH 69/99] Even the next up thread should sleep in the futex --- src/fastlock.cpp | 9 +++------ src/fastlock_x64.asm | 6 +----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 4d3327318..6be725f6d 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -121,12 +121,9 @@ extern "C" void fastlock_lock(struct fastlock *lock) #endif if ((++cloops % 1024*1024) == 0) { - if (static_cast(ticketT.m_active+1U) != myticket) - { - __atomic_fetch_or(&lock->futex, mask, __ATOMIC_ACQUIRE); - futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, ticketT.u, nullptr, mask); - __atomic_fetch_and(&lock->futex, ~mask, __ATOMIC_RELEASE); - } + __atomic_fetch_or(&lock->futex, mask, __ATOMIC_ACQUIRE); + futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, ticketT.u, nullptr, mask); + __atomic_fetch_and(&lock->futex, ~mask, __ATOMIC_RELEASE); ++g_longwaits; } } diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index f7d1a3093..baf33654f 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -44,11 +44,7 @@ ALIGN 16 ; But the compiler doesn't know that we rarely hit this, and when we do we know the lock is ; taking a long time to be released anyways. We optimize for the common case of short ; lock intervals. That's why we're using a spinlock in the first place - inc edx - cmp dx, ax - je .LLoop - dec edx ; restore the current ticket -.LFutexWait: + ; If we get here we're going to sleep in the kernel with a futex push rsi push rax ; Setup the syscall args From 8eb4e355b934e394608b082c7d1973152238fa1f Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 00:16:36 -0400 Subject: [PATCH 70/99] Remove an unnecessary lock that eats performance --- src/networking.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/networking.cpp b/src/networking.cpp index c5a921247..92b41d277 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1685,9 +1685,12 @@ int handleClientsWithPendingWrites(int iel) { } } - AeLocker locker; - locker.arm(nullptr); - ProcessPendingAsyncWrites(); + if (listLength(serverTL->clients_pending_asyncwrite)) + { + AeLocker locker; + locker.arm(nullptr); + ProcessPendingAsyncWrites(); + } return processed; } @@ -2205,8 +2208,11 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { * corresponding part of the replication stream, will be propagated to * the sub-slaves and to the replication backlog. */ processInputBufferAndReplicate(c); - aelock.arm(c); - ProcessPendingAsyncWrites(); + if (listLength(serverTL->clients_pending_asyncwrite)) + { + aelock.arm(c); + ProcessPendingAsyncWrites(); + } } void getClientsMaxBuffers(unsigned long *longest_output_list, From 59dbd625c143c1a890d4387f7a32c997f0d64f5f Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 16:30:32 -0400 Subject: [PATCH 71/99] Move more code out of the GIL --- src/module.cpp | 6 +++++ src/networking.cpp | 59 +--------------------------------------------- src/server.cpp | 25 ++++++++++++++------ src/server.h | 1 + 4 files changed, 26 insertions(+), 65 deletions(-) diff --git a/src/module.cpp b/src/module.cpp index 56a951977..66c62b696 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -4988,6 +4988,12 @@ int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *fi return REDISMODULE_OK; } +int moduleHasCommandFilters() +{ + // Note: called outside the global lock + return listLength(moduleCommandFilters); +} + void moduleCallCommandFilters(client *c) { if (listLength(moduleCommandFilters) == 0) return; diff --git a/src/networking.cpp b/src/networking.cpp index 92b41d277..fa6740048 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -36,66 +36,12 @@ #include #include #include +#include "aelocker.h" static void setProtocolError(const char *errstr, client *c); void addReplyLongLongWithPrefixCore(client *c, long long ll, char prefix, bool fAsync); void addReplyBulkCStringCore(client *c, const char *s, bool fAsync); -class AeLocker -{ - bool m_fArmed = false; - -public: - AeLocker() - { - } - - void arm(client *c) // if a client is passed, then the client is already locked - { - if (c != nullptr) - { - serverAssert(!m_fArmed); - serverAssert(c->lock.fOwnLock()); - - bool fClientLocked = true; - while (!aeTryAcquireLock()) - { - if (fClientLocked) c->lock.unlock(); - fClientLocked = false; - aeAcquireLock(); - if (!c->lock.try_lock()) - { - aeReleaseLock(); - } - else - { - break; - } - } - - m_fArmed = true; - } - else if (!m_fArmed) - { - m_fArmed = true; - aeAcquireLock(); - } - } - - void disarm() - { - serverAssert(m_fArmed); - m_fArmed = false; - aeReleaseLock(); - } - - ~AeLocker() - { - if (m_fArmed) - aeReleaseLock(); - } -}; - /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute * the client output buffer size. */ @@ -2078,9 +2024,6 @@ void processInputBuffer(client *c, int callFlags) { if (c->argc == 0) { resetClient(c); } else { - AeLocker locker; - locker.arm(c); - /* We are finally ready to execute the command. */ if (processCommandAndResetClient(c, callFlags) == C_ERR) { /* If the client is no longer valid, we avoid exiting this diff --git a/src/server.cpp b/src/server.cpp index 46e4aa9a8..443ff53ee 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -59,6 +59,7 @@ #include #include #include +#include "aelocker.h" /* Our shared "common" objects */ @@ -3434,8 +3435,14 @@ void call(client *c, int flags) { * other operations can be performed by the caller. Otherwise * if C_ERR is returned the client was destroyed (i.e. after QUIT). */ int processCommand(client *c, int callFlags) { - serverAssert(GlobalLocksAcquired()); - moduleCallCommandFilters(c); + AeLocker locker; + AssertCorrectThread(c); + + if (moduleHasCommandFilters()) + { + locker.arm(c); + moduleCallCommandFilters(c); + } /* The QUIT command is handled separately. Normal command procs will * go through checking for replication and QUIT will cause trouble @@ -3447,10 +3454,6 @@ int processCommand(client *c, int callFlags) { return C_ERR; } - AssertCorrectThread(c); - serverAssert(GlobalLocksAcquired()); - incrementMvccTstamp(); - /* Now lookup the command and check ASAP about trivial error conditions * such as wrong arity, bad command name and so forth. */ c->cmd = c->lastcmd = lookupCommand((sds)ptrFromObj(c->argv[0])); @@ -3487,6 +3490,8 @@ int processCommand(client *c, int callFlags) { /* Check if the user can run this command according to the current * ACLs. */ + if (c->puser && !(c->puser->flags & USER_FLAG_ALLCOMMANDS)) + locker.arm(c); // ACLs require the lock int acl_retval = ACLCheckCommandPerm(c); if (acl_retval != ACL_OK) { flagTransaction(c); @@ -3512,6 +3517,7 @@ int processCommand(client *c, int callFlags) { !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0 && c->cmd->proc != execCommand)) { + locker.arm(c); int hashslot; int error_code; clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, @@ -3527,6 +3533,11 @@ int processCommand(client *c, int callFlags) { } } + incrementMvccTstamp(); + + if (!locker.isArmed()) + locker.arm(c); + /* Handle the maxmemory directive. * * Note that we do not want to reclaim memory if we are here re-entering @@ -4917,7 +4928,7 @@ void incrementMvccTstamp() } else { - g_pserver->mvcc_tstamp = ((uint64_t)g_pserver->mstime) << 20; + atomicSet(g_pserver->mvcc_tstamp, ((uint64_t)g_pserver->mstime) << 20); } } diff --git a/src/server.h b/src/server.h index 72c2bf634..d3765a000 100644 --- a/src/server.h +++ b/src/server.h @@ -1671,6 +1671,7 @@ void moduleAcquireGIL(int fServerThread); void moduleReleaseGIL(int fServerThread); void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); void moduleCallCommandFilters(client *c); +int moduleHasCommandFilters(); /* Utils */ long long ustime(void); From ae33ff3a6ef6d47bab63cf8362055c1ed210dbad Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 20:26:03 -0400 Subject: [PATCH 72/99] convert the pending write list to a vector for better performance and less cache thrashing --- src/module.cpp | 2 +- src/networking.cpp | 23 ++++++++++------------- src/server.cpp | 3 +-- src/server.h | 4 +++- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/module.cpp b/src/module.cpp index 66c62b696..568205f90 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -3806,7 +3806,7 @@ void moduleHandleBlockedClients(int iel) { AssertCorrectThread(c); fastlock_lock(&g_pserver->rgthreadvar[c->iel].lockPendingWrite); - listAddNodeHead(g_pserver->rgthreadvar[c->iel].clients_pending_write,c); + g_pserver->rgthreadvar[c->iel].clients_pending_write.push_back(c); fastlock_unlock(&g_pserver->rgthreadvar[c->iel].lockPendingWrite); } } diff --git a/src/networking.cpp b/src/networking.cpp index fa6740048..0159ba9f9 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -208,7 +208,7 @@ void clientInstallWriteHandler(client *c) { * we'll not be able to write the whole reply at once. */ c->flags |= CLIENT_PENDING_WRITE; std::unique_lock lockf(g_pserver->rgthreadvar[c->iel].lockPendingWrite); - listAddNodeHead(g_pserver->rgthreadvar[c->iel].clients_pending_write,c); + g_pserver->rgthreadvar[c->iel].clients_pending_write.push_back(c); } } @@ -1212,9 +1212,10 @@ void unlinkClient(client *c) { /* Remove from the list of pending writes if needed. */ if (c->flags & CLIENT_PENDING_WRITE) { std::unique_lock lockf(g_pserver->rgthreadvar[c->iel].lockPendingWrite); - ln = listSearchKey(g_pserver->rgthreadvar[c->iel].clients_pending_write,c); - serverAssert(ln != NULL); - listDelNode(g_pserver->rgthreadvar[c->iel].clients_pending_write,ln); + auto itr = std::find(g_pserver->rgthreadvar[c->iel].clients_pending_write.begin(), + g_pserver->rgthreadvar[c->iel].clients_pending_write.end(), c); + serverAssert(itr != g_pserver->rgthreadvar[c->iel].clients_pending_write.end()); + g_pserver->rgthreadvar[c->iel].clients_pending_write.erase(itr); c->flags &= ~CLIENT_PENDING_WRITE; } @@ -1584,21 +1585,17 @@ void ProcessPendingAsyncWrites() * need to use a syscall in order to install the writable event handler, * get it called, and so forth. */ int handleClientsWithPendingWrites(int iel) { - listIter li; - listNode *ln; - std::unique_lock lockf(g_pserver->rgthreadvar[iel].lockPendingWrite); - list *list = g_pserver->rgthreadvar[iel].clients_pending_write; - int processed = listLength(list); + auto &vec = g_pserver->rgthreadvar[iel].clients_pending_write; + int processed = (int)vec.size(); serverAssert(iel == (serverTL - g_pserver->rgthreadvar)); - listRewind(list,&li); - while((ln = listNext(&li))) { - client *c = (client*)listNodeValue(ln); + while(!vec.empty()) { + client *c = vec.back(); std::unique_locklock)> lock(c->lock); c->flags &= ~CLIENT_PENDING_WRITE; - listDelNode(list,ln); + vec.pop_back(); AssertCorrectThread(c); /* If a client is protected, don't do anything, diff --git a/src/server.cpp b/src/server.cpp index 443ff53ee..fdd1de4d2 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2859,7 +2859,6 @@ static void initNetworking(int fReusePort) static void initServerThread(struct redisServerThreadVars *pvar, int fMain) { - pvar->clients_pending_write = listCreate(); pvar->unblocked_clients = listCreate(); pvar->clients_pending_asyncwrite = listCreate(); pvar->ipfd_count = 0; @@ -5135,7 +5134,7 @@ int main(int argc, char **argv) { if (background) daemonize(); initServer(); - initNetworking(cserver.cthreads > 1 /* fReusePort */); + initNetworking(/*cserver.cthreads >*/ 1 /* fReusePort */); if (background || cserver.pidfile) createPidFile(); redisSetProcTitle(argv[0]); diff --git a/src/server.h b/src/server.h index d3765a000..80281fac4 100644 --- a/src/server.h +++ b/src/server.h @@ -50,6 +50,8 @@ #include #include #include +#include +#include #ifdef __cplusplus extern "C" { #include @@ -1141,7 +1143,7 @@ struct redisServerThreadVars { aeEventLoop *el; int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */ int ipfd_count; /* Used slots in ipfd[] */ - list *clients_pending_write; /* There is to write or install handler. */ + std::vector clients_pending_write; /* There is to write or install handler. */ list *unblocked_clients; /* list of clients to unblock before next loop NOT THREADSAFE */ list *clients_pending_asyncwrite; int cclients; From 9c901eb65f98008f0962c5edc33aadc15ec6f619 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 20:57:12 -0400 Subject: [PATCH 73/99] Fix test failures due to assert --- src/multi.cpp | 1 - src/server.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/multi.cpp b/src/multi.cpp index 262a8f1d0..3383f5a49 100644 --- a/src/multi.cpp +++ b/src/multi.cpp @@ -82,7 +82,6 @@ void discardTransaction(client *c) { /* Flag the transacation as DIRTY_EXEC so that EXEC will fail. * Should be called every time there is an error while queueing a command. */ void flagTransaction(client *c) { - serverAssert(GlobalLocksAcquired()); if (c->flags & CLIENT_MULTI) c->flags |= CLIENT_DIRTY_EXEC; } diff --git a/src/server.cpp b/src/server.cpp index fdd1de4d2..6bb90a122 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -5134,7 +5134,7 @@ int main(int argc, char **argv) { if (background) daemonize(); initServer(); - initNetworking(/*cserver.cthreads >*/ 1 /* fReusePort */); + initNetworking(cserver.cthreads > 1 /* fReusePort */); if (background || cserver.pidfile) createPidFile(); redisSetProcTitle(argv[0]); From 19061fe04f22dc2dcb4547a4c74eccfb89cae785 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 16 Jun 2019 21:10:38 -0400 Subject: [PATCH 74/99] missing file :) --- src/aelocker.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/aelocker.h diff --git a/src/aelocker.h b/src/aelocker.h new file mode 100644 index 000000000..942f936c9 --- /dev/null +++ b/src/aelocker.h @@ -0,0 +1,61 @@ +#pragma once + +class AeLocker +{ + bool m_fArmed = false; + +public: + AeLocker() + { + } + + void arm(client *c) // if a client is passed, then the client is already locked + { + if (c != nullptr) + { + serverAssert(!m_fArmed); + serverAssert(c->lock.fOwnLock()); + + bool fClientLocked = true; + while (!aeTryAcquireLock()) + { + if (fClientLocked) c->lock.unlock(); + fClientLocked = false; + aeAcquireLock(); + if (!c->lock.try_lock()) + { + aeReleaseLock(); + } + else + { + break; + } + } + + m_fArmed = true; + } + else if (!m_fArmed) + { + m_fArmed = true; + aeAcquireLock(); + } + } + + void disarm() + { + serverAssert(m_fArmed); + m_fArmed = false; + aeReleaseLock(); + } + + bool isArmed() const + { + return m_fArmed; + } + + ~AeLocker() + { + if (m_fArmed) + aeReleaseLock(); + } +}; \ No newline at end of file From 0f78b8b91729374f76f562049e16c0556da00959 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 17 Jun 2019 02:21:02 -0400 Subject: [PATCH 75/99] Remove unnecessary recursive lock --- src/server.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 6bb90a122..54e26d41b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2055,12 +2055,10 @@ int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData int iel = ielFromEventLoop(eventLoop); serverAssert(iel != IDX_EVENT_LOOP_MAIN); - aeAcquireLock(); ProcessPendingAsyncWrites(); // A bug but leave for now, events should clean up after themselves clientsCron(iel); freeClientsInAsyncFreeQueue(iel); - aeReleaseLock(); return 1000/g_pserver->hz; } From e37bc4ad40e852b67ee14e5aa87fd2f398f00eed Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 17 Jun 2019 21:53:04 -0400 Subject: [PATCH 76/99] Fix issues with relaxed memory model architectures --- src/ae.cpp | 4 ++-- src/ae.h | 2 +- src/aelocker.h | 28 +++++++++++++++++----------- src/fastlock.cpp | 8 ++++---- src/fastlock.h | 6 +++--- src/networking.cpp | 4 ++-- 6 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/ae.cpp b/src/ae.cpp index 99f09d49f..a78c4a7c2 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -803,9 +803,9 @@ void aeAcquireLock() g_lock.lock(); } -int aeTryAcquireLock() +int aeTryAcquireLock(bool fWeak) { - return g_lock.try_lock(); + return g_lock.try_lock(fWeak); } void aeReleaseLock() diff --git a/src/ae.h b/src/ae.h index f08c49dd8..6fd230da9 100644 --- a/src/ae.h +++ b/src/ae.h @@ -159,7 +159,7 @@ int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeAcquireLock(); -int aeTryAcquireLock(); +int aeTryAcquireLock(bool fWeak); void aeReleaseLock(); int aeThreadOwnsLock(); diff --git a/src/aelocker.h b/src/aelocker.h index 942f936c9..562ac55ef 100644 --- a/src/aelocker.h +++ b/src/aelocker.h @@ -16,19 +16,25 @@ class AeLocker serverAssert(!m_fArmed); serverAssert(c->lock.fOwnLock()); - bool fClientLocked = true; - while (!aeTryAcquireLock()) + if (!aeTryAcquireLock(true /*fWeak*/)) // avoid locking the client if we can { - if (fClientLocked) c->lock.unlock(); - fClientLocked = false; - aeAcquireLock(); - if (!c->lock.try_lock()) + bool fOwnClientLock = true; + for (;;) { - aeReleaseLock(); - } - else - { - break; + if (fOwnClientLock) + { + c->lock.unlock(); + fOwnClientLock = false; + } + aeAcquireLock(); + if (!c->lock.try_lock(false)) // ensure a strong try because aeAcquireLock is expensive + { + aeReleaseLock(); + } + else + { + break; + } } } diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 6be725f6d..1340e087c 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -133,7 +133,7 @@ extern "C" void fastlock_lock(struct fastlock *lock) std::atomic_thread_fence(std::memory_order_acquire); } -extern "C" int fastlock_trylock(struct fastlock *lock) +extern "C" int fastlock_trylock(struct fastlock *lock, int fWeak) { if ((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_ACQUIRE) == gettid()) { @@ -150,7 +150,7 @@ extern "C" int fastlock_trylock(struct fastlock *lock) struct ticket ticket_expect { active, active }; struct ticket ticket_setiflocked { active, next }; - if (__atomic_compare_exchange(&lock->m_ticket, &ticket_expect, &ticket_setiflocked, false /*weak*/, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) + if (__atomic_compare_exchange(&lock->m_ticket, &ticket_expect, &ticket_setiflocked, fWeak /*weak*/, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { lock->m_depth = 1; __atomic_store_4(&lock->m_pidOwner, gettid(), __ATOMIC_RELEASE); @@ -181,8 +181,8 @@ extern "C" void fastlock_unlock(struct fastlock *lock) { assert((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_RELAXED) >= 0); // unlock after free lock->m_pidOwner = -1; - std::atomic_thread_fence(std::memory_order_acquire); - uint16_t activeNew = __atomic_add_fetch(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); // on x86 the atomic is not required here, but ASM handles that case + std::atomic_thread_fence(std::memory_order_release); + uint16_t activeNew = __atomic_add_fetch(&lock->m_ticket.m_active, 1, __ATOMIC_RELEASE); // on x86 the atomic is not required here, but ASM handles that case unlock_futex(lock, activeNew); } } diff --git a/src/fastlock.h b/src/fastlock.h index 81c5807cd..26a4b4d01 100644 --- a/src/fastlock.h +++ b/src/fastlock.h @@ -9,7 +9,7 @@ extern "C" { struct fastlock; void fastlock_init(struct fastlock *lock); void fastlock_lock(struct fastlock *lock); -int fastlock_trylock(struct fastlock *lock); +int fastlock_trylock(struct fastlock *lock, int fWeak); void fastlock_unlock(struct fastlock *lock); void fastlock_free(struct fastlock *lock); @@ -55,9 +55,9 @@ struct fastlock fastlock_lock(this); } - bool try_lock() + bool try_lock(bool fWeak = false) { - return !!fastlock_trylock(this); + return !!fastlock_trylock(this, fWeak); } void unlock() diff --git a/src/networking.cpp b/src/networking.cpp index 0159ba9f9..76d07a117 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1475,7 +1475,7 @@ int writeToClient(int fd, client *c, int handler_installed) { serverLog(LL_VERBOSE, "Error writing to client: %s", strerror(errno)); lock.unlock(); - if (aeTryAcquireLock()) + if (aeTryAcquireLock(true /*fWeak*/)) { freeClient(c); aeReleaseLock(); @@ -1502,7 +1502,7 @@ int writeToClient(int fd, client *c, int handler_installed) { /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { lock.unlock(); - if (aeTryAcquireLock()) + if (aeTryAcquireLock(true /*fWeak*/)) { freeClient(c); aeReleaseLock(); From 8238d3207e484e5c9515d00ee7d2247168c03612 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 17 Jun 2019 23:56:23 -0400 Subject: [PATCH 77/99] Fix Travis CI issue --- src/server.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.h b/src/server.h index 80281fac4..1e854abca 100644 --- a/src/server.h +++ b/src/server.h @@ -40,6 +40,7 @@ #include #include +#include #include #include #include From 510e966dc0cae510f6687e8d5e35c8576ce62cef Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 18 Jun 2019 00:02:36 -0400 Subject: [PATCH 78/99] Bump travis compiler version --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 90d8fc61c..64c040b38 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,11 @@ matrix: include: - os: linux script: make - env: COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5 + env: COMPILER_NAME=g++-6 CXX=g++-6 CC=gcc-6 addons: apt: packages: - - g++-5 + - g++-6 - nasm - uuid-dev sources: &sources From 79e5160e376cfedc690d351d49c3327d5f48c580 Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 18 Jun 2019 00:06:42 -0400 Subject: [PATCH 79/99] Fix libcxxabi clang compile failure in travis, as well as bool issue due to old clang --- .travis.yml | 3 ++- src/ae.cpp | 4 ++-- src/ae.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 64c040b38..8d20ca462 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,10 +16,11 @@ matrix: - ubuntu-toolchain-r-test - os: linux script: make MALLOC=libc - env: COMPILER_NAME=clang CXX=clang++-3.8 CC=clang-3.8 CXXFLAGS="-stdlib=libc++" LDFLAGS="-stdlib=libc++" + env: COMPILER_NAME=clang CXX=clang++-3.8 CC=clang-3.8 CXXFLAGS="-I/usr/include/libcxxabi/" LDFLAGS="-lc++" addons: apt: packages: + - libc++abi-dev - clang-3.8 - libc++-dev - libc++abi-dev diff --git a/src/ae.cpp b/src/ae.cpp index a78c4a7c2..3f1ec9260 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -803,9 +803,9 @@ void aeAcquireLock() g_lock.lock(); } -int aeTryAcquireLock(bool fWeak) +int aeTryAcquireLock(int fWeak) { - return g_lock.try_lock(fWeak); + return g_lock.try_lock(!!fWeak); } void aeReleaseLock() diff --git a/src/ae.h b/src/ae.h index 6fd230da9..14ccc3dc8 100644 --- a/src/ae.h +++ b/src/ae.h @@ -159,7 +159,7 @@ int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeAcquireLock(); -int aeTryAcquireLock(bool fWeak); +int aeTryAcquireLock(int fWeak); void aeReleaseLock(); int aeThreadOwnsLock(); From 16a89f21dc0beb23b7eb8f2e378d076da918c0d6 Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 27 Jun 2019 15:04:09 -0400 Subject: [PATCH 80/99] Fix mac build breaks --- src/Makefile | 1 + src/fastlock.cpp | 11 +++++++++++ src/redis-cli.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 0d5801d45..6c4c4823b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -140,6 +140,7 @@ else # All the other OSes (notably Linux) FINAL_LDFLAGS+= -rdynamic FINAL_LIBS+=-ldl -pthread -lrt -luuid + FINAL_CFLAGS += -DMOTD endif endif endif diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 1340e087c..00ad8884a 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -36,7 +36,9 @@ #include #include #include +#ifdef __linux__ #include +#endif #include #ifdef __APPLE__ @@ -67,6 +69,7 @@ uint64_t fastlock_getlongwaitcount() } #ifndef ASM_SPINLOCK +#ifdef __linux__ static int futex(volatile unsigned *uaddr, int futex_op, int val, const struct timespec *timeout, int val3) { @@ -74,6 +77,7 @@ static int futex(volatile unsigned *uaddr, int futex_op, int val, timeout, uaddr, val3); } #endif +#endif extern "C" pid_t gettid() { @@ -121,9 +125,11 @@ extern "C" void fastlock_lock(struct fastlock *lock) #endif if ((++cloops % 1024*1024) == 0) { +#ifdef __linux__ __atomic_fetch_or(&lock->futex, mask, __ATOMIC_ACQUIRE); futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, ticketT.u, nullptr, mask); __atomic_fetch_and(&lock->futex, ~mask, __ATOMIC_RELEASE); +#endif ++g_longwaits; } } @@ -159,6 +165,7 @@ extern "C" int fastlock_trylock(struct fastlock *lock, int fWeak) return false; } +#ifdef __linux__ #define ROL32(v, shift) ((v << shift) | (v >> (32-shift))) void unlock_futex(struct fastlock *lock, uint16_t ifutex) { @@ -174,6 +181,8 @@ void unlock_futex(struct fastlock *lock, uint16_t ifutex) break; } } +#endif + extern "C" void fastlock_unlock(struct fastlock *lock) { --lock->m_depth; @@ -183,7 +192,9 @@ extern "C" void fastlock_unlock(struct fastlock *lock) lock->m_pidOwner = -1; std::atomic_thread_fence(std::memory_order_release); uint16_t activeNew = __atomic_add_fetch(&lock->m_ticket.m_active, 1, __ATOMIC_RELEASE); // on x86 the atomic is not required here, but ASM handles that case +#ifdef __linux__ unlock_futex(lock, activeNew); +#endif } } #endif diff --git a/src/redis-cli.c b/src/redis-cli.c index 50e2e8480..eae4a1d0e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -6537,7 +6537,7 @@ static void intrinsicLatencyMode(void) { /*------------------------------------------------------------------------------ * Message of the day *--------------------------------------------------------------------------- */ -#ifndef NO_MOTD +#ifdef MOTD #include static const char *szMotdCachePath() From 1531cfb7eea1824fb40da596b3446baa4312355c Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 27 Jun 2019 16:29:36 -0400 Subject: [PATCH 81/99] Fix additional warnings --- src/Makefile | 2 +- src/fastlock.cpp | 12 ++++++++++-- src/new.cpp | 15 +++++++++++++++ src/new.h | 14 ++++---------- src/redis-cli.h | 3 ++- src/version.h | 3 ++- 6 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 src/new.cpp diff --git a/src/Makefile b/src/Makefile index 6c4c4823b..5e035fb17 100644 --- a/src/Makefile +++ b/src/Makefile @@ -198,7 +198,7 @@ endif REDIS_SERVER_NAME=keydb-server REDIS_SENTINEL_NAME=keydb-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o $(ASM_OBJ) +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o $(ASM_OBJ) REDIS_CLI_NAME=keydb-cli REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o $(ASM_OBJ) REDIS_BENCHMARK_NAME=keydb-benchmark diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 00ad8884a..fdf85044a 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -53,6 +53,10 @@ #endif #endif +#ifndef UNUSED +#define UNUSED(x) ((void)x) +#endif + /**************************************************** * * Implementation of a fair spinlock. To promote fairness we @@ -115,7 +119,9 @@ extern "C" void fastlock_lock(struct fastlock *lock) } unsigned myticket = __atomic_fetch_add(&lock->m_ticket.m_avail, 1, __ATOMIC_RELEASE); +#ifdef __linux__ unsigned mask = (1U << (myticket % 32)); +#endif int cloops = 0; ticket ticketT; while (((ticketT.u = __atomic_load_4(&lock->m_ticket.m_active, __ATOMIC_ACQUIRE)) & 0xffff) != myticket) @@ -154,8 +160,8 @@ extern "C" int fastlock_trylock(struct fastlock *lock, int fWeak) uint16_t active = __atomic_load_2(&lock->m_ticket.m_active, __ATOMIC_RELAXED); uint16_t next = active + 1; - struct ticket ticket_expect { active, active }; - struct ticket ticket_setiflocked { active, next }; + struct ticket ticket_expect { { { active, active } } }; + struct ticket ticket_setiflocked { { { active, next } } }; if (__atomic_compare_exchange(&lock->m_ticket, &ticket_expect, &ticket_setiflocked, fWeak /*weak*/, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { lock->m_depth = 1; @@ -194,6 +200,8 @@ extern "C" void fastlock_unlock(struct fastlock *lock) uint16_t activeNew = __atomic_add_fetch(&lock->m_ticket.m_active, 1, __ATOMIC_RELEASE); // on x86 the atomic is not required here, but ASM handles that case #ifdef __linux__ unlock_futex(lock, activeNew); +#else + UNUSED(activeNew); #endif } } diff --git a/src/new.cpp b/src/new.cpp new file mode 100644 index 000000000..c94a3bc18 --- /dev/null +++ b/src/new.cpp @@ -0,0 +1,15 @@ +#include // std::size_t +#include "server.h" +#include "new.h" + +[[deprecated]] +void *operator new(size_t size) +{ + return zmalloc(size, MALLOC_LOCAL); +} + +void operator delete(void * p) noexcept +{ + zfree(p); +} + diff --git a/src/new.h b/src/new.h index 7ea65e979..6d33cf48d 100644 --- a/src/new.h +++ b/src/new.h @@ -2,22 +2,16 @@ #include // std::size_t [[deprecated]] -inline void *operator new(size_t size) -{ - return zmalloc(size, MALLOC_LOCAL); -} +void *operator new(size_t size); inline void *operator new(size_t size, enum MALLOC_CLASS mclass) { return zmalloc(size, mclass); -} - -inline void operator delete(void * p) noexcept -{ - zfree(p); } +void operator delete(void * p) noexcept; + inline void operator delete(void *p, std::size_t) noexcept { zfree(p); -} \ No newline at end of file +} diff --git a/src/redis-cli.h b/src/redis-cli.h index bda80c42b..33910c2ce 100644 --- a/src/redis-cli.h +++ b/src/redis-cli.h @@ -276,4 +276,5 @@ redisReply *sendScan(unsigned long long *it); #ifdef __cplusplus } -#endif \ No newline at end of file +#endif + diff --git a/src/version.h b/src/version.h index 0c56a20a9..db32931f1 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,3 @@ #define KEYDB_REAL_VERSION "0.0.0" -extern const char *KEYDB_SET_VERSION; // Unlike real version, this can be overriden by the config \ No newline at end of file +extern const char *KEYDB_SET_VERSION; // Unlike real version, this can be overriden by the config + From 218111eb7244643bd13af47b878622c429535288 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 5 Jul 2019 23:42:39 -0400 Subject: [PATCH 82/99] Fix additional warning related to new() overloads --- src/Makefile | 4 ++-- src/new.cpp | 9 +++++++++ src/new.h | 11 ++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5e035fb17..2966ec471 100644 --- a/src/Makefile +++ b/src/Makefile @@ -200,9 +200,9 @@ REDIS_SERVER_NAME=keydb-server REDIS_SENTINEL_NAME=keydb-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o $(ASM_OBJ) REDIS_CLI_NAME=keydb-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o $(ASM_OBJ) +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o new.o $(ASM_OBJ) REDIS_BENCHMARK_NAME=keydb-benchmark -REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o redis-benchmark.o storage-lite.o fastlock.o $(ASM_OBJ) +REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o redis-benchmark.o storage-lite.o fastlock.o new.o $(ASM_OBJ) REDIS_CHECK_RDB_NAME=keydb-check-rdb REDIS_CHECK_AOF_NAME=keydb-check-aof diff --git a/src/new.cpp b/src/new.cpp index c94a3bc18..044257928 100644 --- a/src/new.cpp +++ b/src/new.cpp @@ -8,8 +8,17 @@ void *operator new(size_t size) return zmalloc(size, MALLOC_LOCAL); } +void *operator new(size_t size, enum MALLOC_CLASS mclass) +{ + return zmalloc(size, mclass); +} + void operator delete(void * p) noexcept { zfree(p); } +void operator delete(void *p, std::size_t) noexcept +{ + zfree(p); +} \ No newline at end of file diff --git a/src/new.h b/src/new.h index 6d33cf48d..69464f127 100644 --- a/src/new.h +++ b/src/new.h @@ -4,14 +4,7 @@ [[deprecated]] void *operator new(size_t size); -inline void *operator new(size_t size, enum MALLOC_CLASS mclass) -{ - return zmalloc(size, mclass); -} +void *operator new(size_t size, enum MALLOC_CLASS mclass); void operator delete(void * p) noexcept; - -inline void operator delete(void *p, std::size_t) noexcept -{ - zfree(p); -} +void operator delete(void *p, std::size_t) noexcept; From 852819180f6a281449a376b6b79a424af4e63c82 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 5 Jul 2019 23:43:01 -0400 Subject: [PATCH 83/99] We can use less memory in the client output buffers. Its OK --- tests/unit/obuf-limits.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/obuf-limits.tcl b/tests/unit/obuf-limits.tcl index 5d625cf45..b205eb31b 100644 --- a/tests/unit/obuf-limits.tcl +++ b/tests/unit/obuf-limits.tcl @@ -15,7 +15,7 @@ start_server {tags {"obuf-limits"}} { if {![regexp {omem=([0-9]+)} $c - omem]} break if {$omem > 200000} break } - assert {$omem >= 90000 && $omem < 200000} + assert {$omem >= 80000 && $omem < 200000} $rd1 close } From d918ca6fa57a6149b86b4effc787dbdde7350133 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 6 Jul 2019 00:36:23 -0400 Subject: [PATCH 84/99] Add back user space buffering of RDB save --- src/aof.cpp | 4 ++-- src/rdb-s3.cpp | 15 +++++++++++---- src/rdb.cpp | 8 ++++---- src/rdb.h | 2 +- src/redis-check-rdb.cpp | 2 +- src/rio.cpp | 15 ++++++++------- src/rio.h | 4 ++-- 7 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/aof.cpp b/src/aof.cpp index bfebf5a5c..c7160489b 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -752,7 +752,7 @@ int loadAppendOnlyFile(char *filename) { serverLog(LL_NOTICE,"Reading RDB preamble from AOF file..."); if (fseek(fp,0,SEEK_SET) == -1) goto readerr; - rioInitWithFile(&rdb,fileno(fp)); + rioInitWithFile(&rdb,fp); rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; if (rdbLoadRio(&rdb,&rsi,1) != C_OK) { serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted"); @@ -1400,7 +1400,7 @@ int rewriteAppendOnlyFile(char *filename) { } g_pserver->aof_child_diff = sdsempty(); - rioInitWithFile(&aof,fileno(fp)); + rioInitWithFile(&aof,fp); if (g_pserver->aof_rewrite_incremental_fsync) rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES); diff --git a/src/rdb-s3.cpp b/src/rdb-s3.cpp index e32275ef2..39be87f83 100644 --- a/src/rdb-s3.cpp +++ b/src/rdb-s3.cpp @@ -33,12 +33,19 @@ int rdbSaveS3(char *s3bucket, rdbSaveInfo *rsi) else { close(fd[0]); - if (rdbSaveFd(fd[1], rsi) != C_OK) + FILE *fp = fdopen(fd[1], "w"); + if (fp == NULL) { - close(fd[1]); + close (fd[1]); return C_ERR; } - close(fd[1]); + + if (rdbSaveFp(fp, rsi) != C_OK) + { + fclose(fp); + return C_ERR; + } + fclose(fp); waitpid(pid, &status, 0); } @@ -59,7 +66,7 @@ int rdbLoadS3Core(int fd, rdbSaveInfo *rsi) if ((fp = fdopen(fd, "rb")) == NULL) return C_ERR; startLoading(fp); - rioInitWithFile(&rdb,fileno(fp)); + rioInitWithFile(&rdb,fp); retval = rdbLoadRio(&rdb,rsi,0); fclose(fp); stopLoading(); diff --git a/src/rdb.cpp b/src/rdb.cpp index a5f36b95a..97b20e24b 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -1220,12 +1220,12 @@ int rdbSaveRioWithEOFMark(rio *rdb, int *error, rdbSaveInfo *rsi) { return C_ERR; } -int rdbSaveFd(int fd, rdbSaveInfo *rsi) +int rdbSaveFp(FILE *fp, rdbSaveInfo *rsi) { int error = 0; rio rdb; - rioInitWithFile(&rdb,fd); + rioInitWithFile(&rdb,fp); if (g_pserver->rdb_save_incremental_fsync) rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES); @@ -1267,7 +1267,7 @@ int rdbSaveFile(char *filename, rdbSaveInfo *rsi) { return C_ERR; } - if (rdbSaveFd(fileno(fp), rsi) == C_ERR){ + if (rdbSaveFp(fp, rsi) == C_ERR){ goto werr; } @@ -2151,7 +2151,7 @@ int rdbLoadFile(char *filename, rdbSaveInfo *rsi) { if ((fp = fopen(filename,"r")) == NULL) return C_ERR; startLoading(fp); - rioInitWithFile(&rdb,fileno(fp)); + rioInitWithFile(&rdb,fp); retval = rdbLoadRio(&rdb,rsi,0); fclose(fp); stopLoading(); diff --git a/src/rdb.h b/src/rdb.h index 45cfa475a..0ee2cad92 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -141,7 +141,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi); void rdbRemoveTempFile(pid_t childpid); int rdbSave(rdbSaveInfo *rsi); int rdbSaveFile(char *filename, rdbSaveInfo *rsi); -int rdbSaveFd(int fd, rdbSaveInfo *rsi); +int rdbSaveFp(FILE *pf, rdbSaveInfo *rsi); int rdbSaveS3(char *path, rdbSaveInfo *rsi); int rdbLoadS3(char *path, rdbSaveInfo *rsi); ssize_t rdbSaveObject(rio *rdb, robj_roptr o, robj *key); diff --git a/src/redis-check-rdb.cpp b/src/redis-check-rdb.cpp index a1194799e..1e8c20428 100644 --- a/src/redis-check-rdb.cpp +++ b/src/redis-check-rdb.cpp @@ -186,7 +186,7 @@ int redis_check_rdb(const char *rdbfilename, FILE *fp) { int closefile = (fp == NULL); if (fp == NULL && (fp = fopen(rdbfilename,"r")) == NULL) return 1; - rioInitWithFile(&rdb,fileno(fp)); + rioInitWithFile(&rdb,fp); rdbstate.rio = &rdb; rdb.update_cksum = rdbLoadProgressCallback; if (rioRead(&rdb,buf,9) == 0) goto eoferr; diff --git a/src/rio.cpp b/src/rio.cpp index 3c0c7672a..d6d1937eb 100644 --- a/src/rio.cpp +++ b/src/rio.cpp @@ -109,13 +109,14 @@ void rioInitWithBuffer(rio *r, sds s) { static size_t rioFileWrite(rio *r, const void *buf, size_t len) { size_t retval; - retval = write(r->io.file.fd,buf,len); + retval = fwrite(buf,len,1,r->io.file.fp); r->io.file.buffered += len; if (r->io.file.autosync && r->io.file.buffered >= r->io.file.autosync) { - redis_fsync(r->io.file.fd); + fflush(r->io.file.fp); + redis_fsync(fileno(r->io.file.fp)); r->io.file.buffered = 0; } return retval; @@ -123,18 +124,18 @@ static size_t rioFileWrite(rio *r, const void *buf, size_t len) { /* Returns 1 or 0 for success/failure. */ static size_t rioFileRead(rio *r, void *buf, size_t len) { - return read(r->io.file.fd,buf,len); + return fread(buf,len,1,r->io.file.fp); } /* Returns read/write position in file. */ static off_t rioFileTell(rio *r) { - return lseek(r->io.file.fd, 0, SEEK_CUR); + return ftello(r->io.file.fp); } /* Flushes any buffer to target device if applicable. Returns 1 on success * and 0 on failures. */ static int rioFileFlush(rio *r) { - return (fsync(r->io.file.fd) == 0) ? 1 : 0; + return (fflush(r->io.file.fp) == 0) ? 1 : 0; } static const rio rioFileIO = { @@ -149,9 +150,9 @@ static const rio rioFileIO = { { { NULL, 0 } } /* union for io-specific vars */ }; -void rioInitWithFile(rio *r, int fd) { +void rioInitWithFile(rio *r, FILE *fp) { *r = rioFileIO; - r->io.file.fd = fd; + r->io.file.fp = fp; r->io.file.buffered = 0; r->io.file.autosync = 0; } diff --git a/src/rio.h b/src/rio.h index 172b7f9b2..3ec32263b 100644 --- a/src/rio.h +++ b/src/rio.h @@ -73,7 +73,7 @@ struct _rio { } buffer; /* Stdio file pointer target. */ struct { - int fd; + FILE *fp; off_t buffered; /* Bytes written since last fsync. */ off_t autosync; /* fsync after 'autosync' bytes written. */ } file; @@ -128,7 +128,7 @@ static inline int rioFlush(rio *r) { return r->flush(r); } -void rioInitWithFile(rio *r, int fd); +void rioInitWithFile(rio *r, FILE *fp); void rioInitWithBuffer(rio *r, sds s); void rioInitWithFdset(rio *r, int *fds, int numfds); From d013ad9ebaf3c2bf38121349ed9b98e16439c734 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 6 Jul 2019 00:53:20 -0400 Subject: [PATCH 85/99] Redis only does this in beforeSleep function - doing extra calls like we do causes test issues --- src/server.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 54e26d41b..a41e946c8 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2001,9 +2001,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { flushAppendOnlyFile(0); } - /* Close clients that need to be closed asynchronous */ - freeClientsInAsyncFreeQueue(IDX_EVENT_LOOP_MAIN); - /* Clear the paused clients flag if needed. */ clientsArePaused(); /* Don't check return value, just use the side effect.*/ @@ -2058,8 +2055,6 @@ int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData ProcessPendingAsyncWrites(); // A bug but leave for now, events should clean up after themselves clientsCron(iel); - freeClientsInAsyncFreeQueue(iel); - return 1000/g_pserver->hz; } From 122ec25bf0db0b90e7789b1ab90695ac3f9258b7 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 6 Jul 2019 00:53:40 -0400 Subject: [PATCH 86/99] lazyfree needs to consume more memory to be detectable --- tests/unit/lazyfree.tcl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl index b9b617085..8efb3aecd 100644 --- a/tests/unit/lazyfree.tcl +++ b/tests/unit/lazyfree.tcl @@ -19,14 +19,13 @@ start_server {tags {"lazyfree"}} { } test "FLUSHDB ASYNC can reclaim memory in background" { - after 500 # Sometimes Redis is busy with a prior operation set orig_mem [s used_memory] set args {} - for {set i 0} {$i < 100000} {incr i} { + for {set i 0} {$i < 200000} {incr i} { lappend args $i } r sadd myset {*}$args - assert {[r scard myset] == 100000} + assert {[r scard myset] == 200000} set peak_mem [s used_memory] r flushdb async assert {$peak_mem > $orig_mem+1000000} From a5784ef09e71a9a45780a8f3dbab875b1f1fe1a5 Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 11 Jul 2019 17:00:23 -0400 Subject: [PATCH 87/99] Fix active replication offset synchronization accounting, and enable the wait command --- src/networking.cpp | 1 + src/replication.cpp | 12 +++++++++--- src/server.h | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/networking.cpp b/src/networking.cpp index 76d07a117..39282d5c6 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -144,6 +144,7 @@ client *createClient(int fd, int iel) { c->replstate = REPL_STATE_NONE; c->repl_put_online_on_ack = 0; c->reploff = 0; + c->reploff_skipped = 0; c->read_reploff = 0; c->repl_ack_off = 0; c->repl_ack_time = 0; diff --git a/src/replication.cpp b/src/replication.cpp index e9e37f191..c5d9fa6bc 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -323,6 +323,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { char proto[1024]; int cchProto = snprintf(proto, sizeof(proto), "*3\r\n$7\r\nRREPLAY\r\n$%d\r\n%s\r\n$%lld\r\n", (int)strlen(uuid), uuid, cchbuf); cchProto = std::min((int)sizeof(proto), cchProto); + long long master_repl_offset_start = g_pserver->master_repl_offset; /* Write the command to the replication backlog if any. */ if (g_pserver->repl_backlog) @@ -375,8 +376,12 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { /* Don't feed slaves that are still waiting for BGSAVE to start */ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; - if (serverTL->current_client && FSameHost(serverTL->current_client, slave)) continue; std::unique_locklock)> lock(slave->lock); + if (serverTL->current_client && FSameHost(serverTL->current_client, slave)) + { + slave->reploff_skipped += g_pserver->master_repl_offset - master_repl_offset_start; + continue; + } if (!fSendRaw) addReplyProtoAsync(slave, proto, cchProto); @@ -987,7 +992,7 @@ void replconfCommand(client *c) { if ((getLongLongFromObject(c->argv[j+1], &offset) != C_OK)) return; if (offset > c->repl_ack_off) - c->repl_ack_off = offset; + c->repl_ack_off = offset + c->reploff_skipped; c->repl_ack_time = g_pserver->unixtime; /* If this was a diskless replication, we need to really put * the slave online when the first ACK is received (which @@ -1290,6 +1295,7 @@ void replicationCreateMasterClient(redisMaster *mi, int fd, int dbid) { mi->master->flags |= CLIENT_MASTER; mi->master->authenticated = 1; mi->master->reploff = mi->master_initial_offset; + mi->master->reploff_skipped = 0; mi->master->read_reploff = mi->master->reploff; mi->master->puser = NULL; /* This client can do everything. */ @@ -2786,7 +2792,7 @@ void waitCommand(client *c) { long numreplicas, ackreplicas; long long offset = c->woff; - if (listLength(g_pserver->masters)) { + if (listLength(g_pserver->masters) && !g_pserver->fActiveReplica) { addReplyError(c,"WAIT cannot be used with replica instances. Please also note that since Redis 4.0 if a replica is configured to be writable (which is not the default) writes to replicas are just local and are not propagated."); return; } diff --git a/src/server.h b/src/server.h index 1e854abca..8f45d39ed 100644 --- a/src/server.h +++ b/src/server.h @@ -932,6 +932,7 @@ typedef struct client { sds replpreamble; /* Replication DB preamble. */ long long read_reploff; /* Read replication offset if this is a master. */ long long reploff; /* Applied replication offset if this is a master. */ + long long reploff_skipped; /* Repl backlog we did not send to this client */ long long repl_ack_off; /* Replication ack offset, if this is a slave. */ long long repl_ack_time;/* Replication ack time, if this is a slave. */ long long psync_initial_offset; /* FULLRESYNC reply offset other slaves @@ -1160,7 +1161,7 @@ struct redisMaster { char *masterauth; /* AUTH with this password with master */ char *masterhost; /* Hostname of master */ int masterport; /* Port of master */ - client *cached_master; /* Cached master to be reused for PSYNC. */ + client *cached_master; /* Cached master to be reused for PSYNC. */ client *master; /* The following two fields is where we store master PSYNC replid/offset * while the PSYNC is in progress. At the end we'll copy the fields into From b5101a460dba56d6deac5b753d09d47d834eee4a Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 11 Jul 2019 18:51:20 -0400 Subject: [PATCH 88/99] Active Replicas are not slaves --- src/server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index a41e946c8..087821078 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4337,7 +4337,8 @@ sds genRedisInfoString(const char *section) { info = sdscatprintf(info, "# Replication\r\n" "role:%s\r\n", - listLength(g_pserver->masters) == 0 ? "master" : "slave"); + listLength(g_pserver->masters) == 0 ? "master" + : g_pserver->fActiveReplica ? "active-replica" : "slave"); if (listLength(g_pserver->masters)) { listIter li; listNode *ln; From 6cf85d1d01e56de0bb6e5d34590623cd8dabd32b Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 11 Jul 2019 19:20:12 -0400 Subject: [PATCH 89/99] Fix more accounting errors with active replication and the replication offset. Specifically we don't update repl_ack_off as frequently as we do reploff_skipped --- src/replication.cpp | 6 +++--- src/server.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index c5d9fa6bc..b37d1b655 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -992,7 +992,7 @@ void replconfCommand(client *c) { if ((getLongLongFromObject(c->argv[j+1], &offset) != C_OK)) return; if (offset > c->repl_ack_off) - c->repl_ack_off = offset + c->reploff_skipped; + c->repl_ack_off = offset; c->repl_ack_time = g_pserver->unixtime; /* If this was a diskless replication, we need to really put * the slave online when the first ACK is received (which @@ -2435,7 +2435,7 @@ void roleCommand(client *c) { addReplyArrayLen(c,3); addReplyBulkCString(c,slaveip); addReplyBulkLongLong(c,slave->slave_listening_port); - addReplyBulkLongLong(c,slave->repl_ack_off); + addReplyBulkLongLong(c,slave->repl_ack_off+slave->reploff_skipped); slaves++; } setDeferredArrayLen(c,mbcount,slaves); @@ -2780,7 +2780,7 @@ int replicationCountAcksByOffset(long long offset) { client *slave = (client*)ln->value; if (slave->replstate != SLAVE_STATE_ONLINE) continue; - if (slave->repl_ack_off >= offset) count++; + if ((slave->repl_ack_off + slave->reploff_skipped) >= offset) count++; } return count; } diff --git a/src/server.cpp b/src/server.cpp index 087821078..4d2735467 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4448,7 +4448,7 @@ sds genRedisInfoString(const char *section) { "slave%d:ip=%s,port=%d,state=%s," "offset=%lld,lag=%ld\r\n", slaveid,slaveip,slave->slave_listening_port,state, - slave->repl_ack_off, lag); + (slave->repl_ack_off + slave->reploff_skipped), lag); slaveid++; } } From 1f08a0efb33511ddc75c2acc62199bfcd0860137 Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 11 Jul 2019 20:20:01 -0400 Subject: [PATCH 90/99] Fix compile errors on GCC v5 --- src/networking.cpp | 2 +- src/object.cpp | 4 ++-- src/rdb.cpp | 4 ++-- src/server.h | 2 +- src/sort.cpp | 2 +- src/t_hash.cpp | 2 +- src/t_string.cpp | 4 ++-- src/t_zset.cpp | 22 +++++++++++----------- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/networking.cpp b/src/networking.cpp index 39282d5c6..7342a0fe4 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -623,7 +623,7 @@ void setDeferredPushLen(client *c, void *node, long length) { /* Add a double as a bulk reply */ void addReplyDoubleCore(client *c, double d, bool fAsync) { - if (isinf(d)) { + if (std::isinf(d)) { /* Libc in odd systems (Hi Solaris!) will format infinite in a * different way, so better to handle it in an explicit way. */ if (c->resp == 2) { diff --git a/src/object.cpp b/src/object.cpp index de770e47e..b57a09b7e 100644 --- a/src/object.cpp +++ b/src/object.cpp @@ -624,7 +624,7 @@ int getDoubleFromObject(const robj *o, double *target) { (size_t)(eptr-(char*)szFromObj(o)) != sdslen(szFromObj(o)) || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || - isnan(value)) + std::isnan(value)) return C_ERR; } else if (o->encoding == OBJ_ENCODING_INT) { value = (long)ptrFromObj(o); @@ -666,7 +666,7 @@ int getLongDoubleFromObject(robj *o, long double *target) { (size_t)(eptr-(char*)szFromObj(o)) != sdslen(szFromObj(o)) || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || - isnan(value)) + std::isnan(value)) return C_ERR; } else if (o->encoding == OBJ_ENCODING_INT) { value = (long)szFromObj(o); diff --git a/src/rdb.cpp b/src/rdb.cpp index 97b20e24b..2cac89178 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -543,10 +543,10 @@ int rdbSaveDoubleValue(rio *rdb, double val) { unsigned char buf[128]; int len; - if (isnan(val)) { + if (std::isnan(val)) { buf[0] = 253; len = 1; - } else if (!isfinite(val)) { + } else if (!std::isfinite(val)) { len = 1; buf[0] = (val < 0) ? 255 : 254; } else { diff --git a/src/server.h b/src/server.h index 8f45d39ed..6b66c9635 100644 --- a/src/server.h +++ b/src/server.h @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/sort.cpp b/src/sort.cpp index 6b517b25a..34b86d700 100644 --- a/src/sort.cpp +++ b/src/sort.cpp @@ -475,7 +475,7 @@ void sortCommand(client *c) { vector[j].u.score = strtod(szFromObj(byval),&eptr); if (eptr[0] != '\0' || errno == ERANGE || - isnan(vector[j].u.score)) + std::isnan(vector[j].u.score)) { int_conversion_error = 1; } diff --git a/src/t_hash.cpp b/src/t_hash.cpp index a7d35a926..f2758bdd2 100644 --- a/src/t_hash.cpp +++ b/src/t_hash.cpp @@ -615,7 +615,7 @@ void hincrbyfloatCommand(client *c) { } value += incr; - if (isnan(value) || isinf(value)) { + if (std::isnan(value) || std::isinf(value)) { addReplyError(c,"increment would produce NaN or Infinity"); return; } diff --git a/src/t_string.cpp b/src/t_string.cpp index aea74b48d..4cb30eac6 100644 --- a/src/t_string.cpp +++ b/src/t_string.cpp @@ -28,7 +28,7 @@ */ #include "server.h" -#include /* isnan(), isinf() */ +#include /* isnan(), isinf() */ /*----------------------------------------------------------------------------- * String Commands @@ -408,7 +408,7 @@ void incrbyfloatCommand(client *c) { return; value += incr; - if (isnan(value) || isinf(value)) { + if (std::isnan(value) || std::isinf(value)) { addReplyError(c,"increment would produce NaN or Infinity"); return; } diff --git a/src/t_zset.cpp b/src/t_zset.cpp index 1c965813d..456348e10 100644 --- a/src/t_zset.cpp +++ b/src/t_zset.cpp @@ -134,7 +134,7 @@ zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { unsigned int rank[ZSKIPLIST_MAXLEVEL]; int i, level; - serverAssert(!isnan(score)); + serverAssert(!std::isnan(score)); x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* store rank that is crossed to reach the insert position */ @@ -530,11 +530,11 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { } else { if (((char*)ptrFromObj(min))[0] == '(') { spec->min = strtod((char*)ptrFromObj(min)+1,&eptr); - if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; + if (eptr[0] != '\0' || std::isnan(spec->min)) return C_ERR; spec->minex = 1; } else { spec->min = strtod((char*)ptrFromObj(min),&eptr); - if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; + if (eptr[0] != '\0' || std::isnan(spec->min)) return C_ERR; } } if (max->encoding == OBJ_ENCODING_INT) { @@ -542,11 +542,11 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { } else { if (((char*)ptrFromObj(max))[0] == '(') { spec->max = strtod((char*)ptrFromObj(max)+1,&eptr); - if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; + if (eptr[0] != '\0' || std::isnan(spec->max)) return C_ERR; spec->maxex = 1; } else { spec->max = strtod((char*)ptrFromObj(max),&eptr); - if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; + if (eptr[0] != '\0' || std::isnan(spec->max)) return C_ERR; } } @@ -1320,7 +1320,7 @@ int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) { double curscore; /* NaN as input is an error regardless of all the other parameters. */ - if (isnan(score)) { + if (std::isnan(score)) { *flags = ZADD_NAN; return 0; } @@ -1339,7 +1339,7 @@ int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) { /* Prepare the score for the increment if needed. */ if (incr) { score += curscore; - if (isnan(score)) { + if (std::isnan(score)) { *flags |= ZADD_NAN; return 0; } @@ -1385,7 +1385,7 @@ int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) { /* Prepare the score for the increment if needed. */ if (incr) { score += curscore; - if (isnan(score)) { + if (std::isnan(score)) { *flags |= ZADD_NAN; return 0; } @@ -2150,7 +2150,7 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat /* The result of adding two doubles is NaN when one variable * is +inf and the other is -inf. When these numbers are added, * we maintain the convention of the result being 0.0. */ - if (isnan(*target)) *target = 0.0; + if (std::isnan(*target)) *target = 0.0; } else if (aggregate == REDIS_AGGR_MIN) { *target = val < *target ? val : *target; } else if (aggregate == REDIS_AGGR_MAX) { @@ -2283,7 +2283,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { double score, value; score = src[0].weight * zval.score; - if (isnan(score)) score = 0; + if (std::isnan(score)) score = 0; for (j = 1; j < setnum; j++) { /* It is not safe to access the zset we are @@ -2330,7 +2330,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { while (zuiNext(&src[i],&zval)) { /* Initialize value */ score = src[i].weight * zval.score; - if (isnan(score)) score = 0; + if (std::isnan(score)) score = 0; /* Search for this element in the accumulating dictionary. */ de = dictAddRaw(accumulator,zuiSdsFromValue(&zval),&existing); From f12c3cf9ddccedbafd7cde05fcabc5f47e5c58a3 Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 11 Jul 2019 20:29:47 -0400 Subject: [PATCH 91/99] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 37e735faf..1f23f2c04 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![Current Release](https://img.shields.io/github/release/JohnSully/KeyDB.svg) [![Build Status](https://travis-ci.org/JohnSully/KeyDB.svg?branch=unstable)](https://travis-ci.org/JohnSully/KeyDB) [![Join the chat at https://gitter.im/KeyDB/community](https://badges.gitter.im/KeyDB/community.svg)](https://gitter.im/KeyDB/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![StackShare](http://img.shields.io/badge/tech-stack-0690fa.svg?style=flat)](https://stackshare.io/eq-alpha-technology-inc/eq-alpha-technology-inc) What is KeyDB? -------------- From 149c3cf3dd75e8058c7c560d6a7ffbbb95da3898 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 12 Jul 2019 02:13:37 -0400 Subject: [PATCH 92/99] FIX: The dabase count configuration is not respected --- src/server.cpp | 28 ++++++++++++++-------------- tests/unit/other.tcl | 7 +++++++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 4d2735467..fa03b6efc 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2495,20 +2495,6 @@ void initServerConfig(void) { /* Multithreading */ cserver.cthreads = CONFIG_DEFAULT_THREADS; cserver.fThreadAffinity = CONFIG_DEFAULT_THREAD_AFFINITY; - - g_pserver->db = (redisDb*)zmalloc(sizeof(redisDb)*cserver.dbnum, MALLOC_LOCAL); - - /* Create the Redis databases, and initialize other internal state. */ - for (int j = 0; j < cserver.dbnum; j++) { - g_pserver->db[j].pdict = dictCreate(&dbDictType,NULL); - g_pserver->db[j].expires = dictCreate(&keyptrDictType,NULL); - g_pserver->db[j].blocking_keys = dictCreate(&keylistDictType,NULL); - g_pserver->db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); - g_pserver->db[j].watched_keys = dictCreate(&keylistDictType,NULL); - g_pserver->db[j].id = j; - g_pserver->db[j].avg_ttl = 0; - g_pserver->db[j].defrag_later = listCreate(); - } } extern char **environ; @@ -2914,6 +2900,20 @@ void initServer(void) { fastlock_init(&g_pserver->flock); + g_pserver->db = (redisDb*)zmalloc(sizeof(redisDb)*cserver.dbnum, MALLOC_LOCAL); + + /* Create the Redis databases, and initialize other internal state. */ + for (int j = 0; j < cserver.dbnum; j++) { + g_pserver->db[j].pdict = dictCreate(&dbDictType,NULL); + g_pserver->db[j].expires = dictCreate(&keyptrDictType,NULL); + g_pserver->db[j].blocking_keys = dictCreate(&keylistDictType,NULL); + g_pserver->db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); + g_pserver->db[j].watched_keys = dictCreate(&keylistDictType,NULL); + g_pserver->db[j].id = j; + g_pserver->db[j].avg_ttl = 0; + g_pserver->db[j].defrag_later = listCreate(); + } + if (g_pserver->syslog_enabled) { openlog(g_pserver->syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT, g_pserver->syslog_facility); diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 965902456..a0bbba0c5 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -1,3 +1,10 @@ +start_server {tags {"other"} overrides {databases 64}} { + test {CONF-DATABASES - ensure the databases config option is respected} { + r select 63 + r set testkey {foo} + } {OK} +} + start_server {tags {"other"}} { if {$::force_failure} { # This is used just for test suite development purposes. From 528d10091fda0d2c56674e825c4f70467587955f Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 12 Jul 2019 03:54:41 -0400 Subject: [PATCH 93/99] Add Active Replication tests --- src/networking.cpp | 4 +- src/replication.cpp | 20 ++++-- tests/integration/replication-active.tcl | 83 ++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 4 files changed, 100 insertions(+), 8 deletions(-) create mode 100644 tests/integration/replication-active.tcl diff --git a/src/networking.cpp b/src/networking.cpp index 7342a0fe4..b9b684407 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -2577,7 +2577,9 @@ void helloCommand(client *c) { if (!g_pserver->sentinel_mode) { addReplyBulkCString(c,"role"); - addReplyBulkCString(c,listLength(g_pserver->masters) ? "replica" : "master"); + addReplyBulkCString(c,listLength(g_pserver->masters) ? + g_pserver->fActiveReplica ? "active-replica" : "replica" + : "master"); } addReplyBulkCString(c,"modules"); diff --git a/src/replication.cpp b/src/replication.cpp index b37d1b655..c782e7aa7 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -2342,12 +2342,15 @@ void replicationUnsetMaster(redisMaster *mi) { /* This function is called when the slave lose the connection with the * master into an unexpected way. */ void replicationHandleMasterDisconnection(redisMaster *mi) { - mi->master = NULL; - mi->repl_state = REPL_STATE_CONNECT; - mi->repl_down_since = g_pserver->unixtime; - /* We lost connection with our master, don't disconnect slaves yet, - * maybe we'll be able to PSYNC with our master later. We'll disconnect - * the slaves only if we'll have to do a full resync with our master. */ + if (mi != nullptr) + { + mi->master = NULL; + mi->repl_state = REPL_STATE_CONNECT; + mi->repl_down_since = g_pserver->unixtime; + /* We lost connection with our master, don't disconnect slaves yet, + * maybe we'll be able to PSYNC with our master later. We'll disconnect + * the slaves only if we'll have to do a full resync with our master. */ + } } void replicaofCommand(client *c) { @@ -2449,7 +2452,10 @@ void roleCommand(client *c) { redisMaster *mi = (redisMaster*)listNodeValue(ln); const char *slavestate = NULL; addReplyArrayLen(c,5); - addReplyBulkCBuffer(c,"slave",5); + if (g_pserver->fActiveReplica) + addReplyBulkCBuffer(c,"active-replica",14); + else + addReplyBulkCBuffer(c,"slave",5); addReplyBulkCString(c,mi->masterhost); addReplyLongLong(c,mi->masterport); if (slaveIsInHandshakeState(mi)) { diff --git a/tests/integration/replication-active.tcl b/tests/integration/replication-active.tcl new file mode 100644 index 000000000..02013c959 --- /dev/null +++ b/tests/integration/replication-active.tcl @@ -0,0 +1,83 @@ +start_server {tags {"active-repl"} overrides {active-replica yes}} { + set slave [srv 0 client] + set slave_host [srv 0 host] + set slave_port [srv 0 port] + set slave_log [srv 0 stdout] + set slave_pid [s process_id] + start_server {overrides {active-replica yes}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Use a short replication timeout on the slave, so that if there + # are no bugs the timeout is triggered in a reasonable amount + # of time. + $slave config set repl-timeout 5 + + # Start the replication process... + $slave slaveof $master_host $master_port + $master slaveof $slave_host $slave_port + + test {Active replicas report the correct role} { + wait_for_condition 50 100 { + [string match *active-replica* [$slave role]] + } else { + fail "Replica0 does not report the correct role" + } + wait_for_condition 50 100 { + [string match *active-replica* [$master role]] + } else { + fail "Replica1 does not report the correct role" + } + } + + test {Active replicas propogate} { + $master set testkey foo + wait_for_condition 50 500 { + [string match *foo* [$slave get testkey]] + } else { + fail "replication failed to propogate" + } + + $slave set testkey bar + wait_for_condition 50 500 { + [string match bar [$master get testkey]] + } else { + fail "replication failed to propogate in the other direction" + } + } + + test {Active replicas WAIT} { + # Test that wait succeeds since replicas should be syncronized + $master set testkey foo + $slave set testkey2 test + assert_equal {1} [$master wait 1 10] + assert_equal {1} [$slave wait 1 10] + + # Now setup a situation where wait should fail + exec kill -SIGSTOP $slave_pid + $master set testkey fee + assert_equal {0} [$master wait 1 1] + } + # Resume the replica we paused in the prior test + exec kill -SIGCONT $slave_pid + + test {Active replica expire propogates} { + $master set testkey1 foo + wait_for_condition 50 1000 { + [string match *foo* [$slave get testkey1]] + } else { + fail "Replication failed to propogate" + } + $master pexpire testkey1 200 + after 1000 + assert_equal {0} [$master del testkey1] + assert_equal {0} [$slave del testkey1] + + $slave set testkey1 foo px 200 + after 1000 + assert_equal {0} [$master del testkey1] + assert_equal {0} [$slave del testkey1] + } + } +} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 1442067f5..6abbddbbe 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -41,6 +41,7 @@ set ::all_tests { integration/replication-3 integration/replication-4 integration/replication-psync + integration/replication-active integration/aof integration/rdb integration/convert-zipmap-hash-on-load From 8630339e43c1de1cd723bdfdca8ab5924e2cb8b0 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 12 Jul 2019 20:46:50 -0400 Subject: [PATCH 94/99] Fix deadlock with client and ae locks --- src/aelocker.h | 4 +++- src/fastlock.cpp | 14 ++++++++++++++ src/fastlock.h | 12 ++++++++++++ src/networking.cpp | 8 ++++---- src/scripting.cpp | 22 ++++++++-------------- src/server.h | 4 ++-- 6 files changed, 43 insertions(+), 21 deletions(-) diff --git a/src/aelocker.h b/src/aelocker.h index 562ac55ef..d5c8186bf 100644 --- a/src/aelocker.h +++ b/src/aelocker.h @@ -19,11 +19,12 @@ class AeLocker if (!aeTryAcquireLock(true /*fWeak*/)) // avoid locking the client if we can { bool fOwnClientLock = true; + int clientNesting = 1; for (;;) { if (fOwnClientLock) { - c->lock.unlock(); + clientNesting = c->lock.unlock_recursive(); fOwnClientLock = false; } aeAcquireLock(); @@ -36,6 +37,7 @@ class AeLocker break; } } + c->lock.lock_recursive(clientNesting); } m_fArmed = true; diff --git a/src/fastlock.cpp b/src/fastlock.cpp index fdf85044a..33de19866 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -220,3 +220,17 @@ bool fastlock::fOwnLock() { return gettid() == m_pidOwner; } + +int fastlock_unlock_recursive(struct fastlock *lock) +{ + int rval = lock->m_depth; + lock->m_depth = 1; + fastlock_unlock(lock); + return rval; +} + +void fastlock_lock_recursive(struct fastlock *lock, int nesting) +{ + fastlock_lock(lock); + lock->m_depth = nesting; +} \ No newline at end of file diff --git a/src/fastlock.h b/src/fastlock.h index 26a4b4d01..c7a40bdf3 100644 --- a/src/fastlock.h +++ b/src/fastlock.h @@ -12,6 +12,8 @@ void fastlock_lock(struct fastlock *lock); int fastlock_trylock(struct fastlock *lock, int fWeak); void fastlock_unlock(struct fastlock *lock); void fastlock_free(struct fastlock *lock); +int fastlock_unlock_recursive(struct fastlock *lock); +void fastlock_lock_recursive(struct fastlock *lock, int nesting); uint64_t fastlock_getlongwaitcount(); // this is a global value @@ -65,6 +67,16 @@ struct fastlock fastlock_unlock(this); } + int unlock_recursive() + { + return fastlock_unlock_recursive(this); + } + + void lock_recursive(int nesting) + { + fastlock_lock_recursive(this, nesting); + } + bool fOwnLock(); // true if this thread owns the lock, NOTE: not 100% reliable, use for debugging only #endif }; diff --git a/src/networking.cpp b/src/networking.cpp index b9b684407..36b2aa9de 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1363,9 +1363,9 @@ void freeClientAsync(client *c) { * are in the context of the main thread while the other threads are * idle. */ if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return; - AeLocker lock; - lock.arm(nullptr); std::lock_guardlock)> clientlock(c->lock); + AeLocker lock; + lock.arm(c); c->flags |= CLIENT_CLOSE_ASAP; listAddNodeTail(g_pserver->clients_to_close,c); } @@ -2059,10 +2059,10 @@ void processInputBufferAndReplicate(client *c) { if (applied) { if (!g_pserver->fActiveReplica) { - aeAcquireLock(); + AeLocker ae; + ae.arm(c); replicationFeedSlavesFromMasterStream(g_pserver->slaves, c->pending_querybuf, applied); - aeReleaseLock(); } sdsrange(c->pending_querybuf,applied,-1); } diff --git a/src/scripting.cpp b/src/scripting.cpp index 15f41745e..1548044e2 100644 --- a/src/scripting.cpp +++ b/src/scripting.cpp @@ -371,7 +371,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { int acl_retval = 0; int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS; struct redisCommand *cmd; - client *c = g_pserver->lua_client; + client *c = serverTL->lua_client; sds reply; // Ensure our client is on the right thread @@ -966,8 +966,11 @@ void scriptingInit(int setup) { lua_State *lua = lua_open(); if (setup) { - g_pserver->lua_client = NULL; - g_pserver->lua_caller = NULL; + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + g_pserver->rgthreadvar[iel].lua_client = createClient(-1, iel); + g_pserver->rgthreadvar[iel].lua_client->flags |= CLIENT_LUA; + } g_pserver->lua_timedout = 0; ldbInit(); } @@ -1117,15 +1120,6 @@ void scriptingInit(int setup) { lua_pcall(lua,0,0,0); } - /* Create the (non connected) client that we use to execute Redis commands - * inside the Lua interpreter. - * Note: there is no need to create it again when this function is called - * by scriptingReset(). */ - if (g_pserver->lua_client == NULL) { - g_pserver->lua_client = createClient(-1, IDX_EVENT_LOOP_MAIN); - g_pserver->lua_client->flags |= CLIENT_LUA; - } - /* Lua beginners often don't use "local", this is likely to introduce * subtle bugs in their code. To prevent problems we protect accesses * to global variables. */ @@ -1272,7 +1266,7 @@ sds luaCreateFunction(client *c, lua_State *lua, robj *body) { * so that we can replicate / write in the AOF all the * EVALSHA commands as EVAL using the original script. */ int retval = dictAdd(g_pserver->lua_scripts,sha,body); - serverAssertWithInfo(c ? c : g_pserver->lua_client,NULL,retval == DICT_OK); + serverAssertWithInfo(c ? c : serverTL->lua_client,NULL,retval == DICT_OK); g_pserver->lua_scripts_mem += sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(body); incrRefCount(body); return sha; @@ -1393,7 +1387,7 @@ void evalGenericCommand(client *c, int evalsha) { luaSetGlobalArray(lua,"ARGV",c->argv+3+numkeys,c->argc-3-numkeys); /* Select the right DB in the context of the Lua client */ - selectDb(g_pserver->lua_client,c->db->id); + selectDb(serverTL->lua_client,c->db->id); /* Set a hook in order to be able to stop the script execution if it * is running for too much time. diff --git a/src/server.h b/src/server.h index 6b66c9635..824eef04f 100644 --- a/src/server.h +++ b/src/server.h @@ -1153,6 +1153,7 @@ struct redisServerThreadVars { int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a client blocked on a module command needs to be processed. */ + client *lua_client = nullptr; /* The "fake client" to query Redis from Lua */ struct fastlock lockPendingWrite; }; @@ -1498,8 +1499,7 @@ struct redisServer { REDISMODULE_CLUSTER_FLAG_*. */ /* Scripting */ lua_State *lua; /* The Lua interpreter. We use just one for all clients */ - client *lua_client; /* The "fake client" to query Redis from Lua */ - client *lua_caller; /* The client running EVAL right now, or NULL */ + client *lua_caller = nullptr; /* The client running EVAL right now, or NULL */ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */ unsigned long long lua_scripts_mem; /* Cached scripts' memory + oh */ mstime_t lua_time_limit; /* Script timeout in milliseconds */ From 3081b6f98b5e7a9f3ef7cfe040236070398b081c Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 12 Jul 2019 23:51:45 -0400 Subject: [PATCH 95/99] Fix more locking deadlocks --- src/debug.cpp | 2 +- src/networking.cpp | 66 ++++++++++++++++++++++------------------------ src/server.h | 2 +- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/debug.cpp b/src/debug.cpp index 4e588a254..f4791f593 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -706,7 +706,7 @@ void _serverAssertPrintClientInfo(const client *c) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ==="); - serverLog(LL_WARNING,"client->flags = %d", c->flags); + serverLog(LL_WARNING,"client->flags = %d", static_cast(c->flags)); serverLog(LL_WARNING,"client->fd = %d", c->fd); serverLog(LL_WARNING,"client->argc = %d", c->argc); for (j=0; j < c->argc; j++) { diff --git a/src/networking.cpp b/src/networking.cpp index 36b2aa9de..7cf0ae55f 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1476,15 +1476,7 @@ int writeToClient(int fd, client *c, int handler_installed) { serverLog(LL_VERBOSE, "Error writing to client: %s", strerror(errno)); lock.unlock(); - if (aeTryAcquireLock(true /*fWeak*/)) - { - freeClient(c); - aeReleaseLock(); - } - else - { - freeClientAsync(c); - } + freeClientAsync(c); return C_ERR; } @@ -1503,15 +1495,7 @@ int writeToClient(int fd, client *c, int handler_installed) { /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { lock.unlock(); - if (aeTryAcquireLock(true /*fWeak*/)) - { - freeClient(c); - aeReleaseLock(); - } - else - { - freeClientAsync(c); - } + freeClientAsync(c); return C_ERR; } } @@ -1524,7 +1508,14 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { client *c = (client*)privdata; serverAssert(ielFromEventLoop(el) == c->iel); - writeToClient(fd,c,1); + if (writeToClient(fd,c,1) == C_ERR) + { + AeLocker ae; + c->lock.lock(); + ae.arm(c); + if (c->flags & CLIENT_CLOSE_ASAP) + freeClient(c); + } } void ProcessPendingAsyncWrites() @@ -1591,39 +1582,46 @@ int handleClientsWithPendingWrites(int iel) { int processed = (int)vec.size(); serverAssert(iel == (serverTL - g_pserver->rgthreadvar)); + int ae_flags = AE_WRITABLE|AE_WRITE_THREADSAFE; + /* For the fsync=always policy, we want that a given FD is never + * served for reading and writing in the same event loop iteration, + * so that in the middle of receiving the query, and serving it + * to the client, we'll call beforeSleep() that will do the + * actual fsync of AOF to disk. AE_BARRIER ensures that. */ + if (g_pserver->aof_state == AOF_ON && + g_pserver->aof_fsync == AOF_FSYNC_ALWAYS) + { + ae_flags |= AE_BARRIER; + } + while(!vec.empty()) { client *c = vec.back(); - std::unique_locklock)> lock(c->lock); + AssertCorrectThread(c); c->flags &= ~CLIENT_PENDING_WRITE; vec.pop_back(); - AssertCorrectThread(c); /* If a client is protected, don't do anything, * that may trigger write error or recreate handler. */ if (c->flags & CLIENT_PROTECTED) continue; + std::unique_locklock)> lock(c->lock); + /* Try to write buffers to the client socket. */ if (writeToClient(c->fd,c,0) == C_ERR) { - lock.release(); // client is free'd + if (c->flags & CLIENT_CLOSE_ASAP) + { + c->lock.lock(); + AeLocker ae; + ae.arm(c); + freeClient(c); // writeToClient will only async close, but there's no need to wait + } continue; } /* If after the synchronous writes above we still have data to * output to the client, we need to install the writable handler. */ if (clientHasPendingReplies(c)) { - int ae_flags = AE_WRITABLE|AE_WRITE_THREADSAFE; - /* For the fsync=always policy, we want that a given FD is never - * served for reading and writing in the same event loop iteration, - * so that in the middle of receiving the query, and serving it - * to the client, we'll call beforeSleep() that will do the - * actual fsync of AOF to disk. AE_BARRIER ensures that. */ - if (g_pserver->aof_state == AOF_ON && - g_pserver->aof_fsync == AOF_FSYNC_ALWAYS) - { - ae_flags |= AE_BARRIER; - } - if (aeCreateFileEvent(g_pserver->rgthreadvar[c->iel].el, c->fd, ae_flags, sendReplyToClient, c) == AE_ERR) freeClientAsync(c); } diff --git a/src/server.h b/src/server.h index 824eef04f..8a762e239 100644 --- a/src/server.h +++ b/src/server.h @@ -921,7 +921,7 @@ typedef struct client { time_t ctime; /* Client creation time. */ time_t lastinteraction; /* Time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; - int flags; /* Client flags: CLIENT_* macros. */ + std::atomic flags; /* Client flags: CLIENT_* macros. */ int fPendingAsyncWrite; /* NOTE: Not a flag because it is written to outside of the client lock (locked by the global lock instead) */ int authenticated; /* Needed when the default user requires auth. */ int replstate; /* Replication state if this is a slave. */ From 620c2ec1412a3bcea5fecf21238caa065e73f3e9 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 12 Jul 2019 23:52:07 -0400 Subject: [PATCH 96/99] Give the active-rep more time to sync for test reliability --- tests/integration/replication-active.tcl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/integration/replication-active.tcl b/tests/integration/replication-active.tcl index 02013c959..4583da321 100644 --- a/tests/integration/replication-active.tcl +++ b/tests/integration/replication-active.tcl @@ -51,13 +51,16 @@ start_server {tags {"active-repl"} overrides {active-replica yes}} { # Test that wait succeeds since replicas should be syncronized $master set testkey foo $slave set testkey2 test - assert_equal {1} [$master wait 1 10] - assert_equal {1} [$slave wait 1 10] + assert_equal {1} [$master wait 1 1000] { "value should propogate + within 1 second" } + assert_equal {1} [$slave wait 1 1000] { "value should propogate + within 1 second" } # Now setup a situation where wait should fail exec kill -SIGSTOP $slave_pid $master set testkey fee - assert_equal {0} [$master wait 1 1] + assert_equal {0} [$master wait 1 1000] { "slave shouldn't be + synchronized since its stopped" } } # Resume the replica we paused in the prior test exec kill -SIGCONT $slave_pid @@ -71,8 +74,8 @@ start_server {tags {"active-repl"} overrides {active-replica yes}} { } $master pexpire testkey1 200 after 1000 - assert_equal {0} [$master del testkey1] - assert_equal {0} [$slave del testkey1] + assert_equal {0} [$master del testkey1] {"master expired"} + assert_equal {0} [$slave del testkey1] {"slave expired"} $slave set testkey1 foo px 200 after 1000 From 7468c691ad04829c1fd3ae69f206946e8f38254a Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 13 Jul 2019 16:44:11 -0400 Subject: [PATCH 97/99] Blocked clients can stall when under low load --- src/blocked.cpp | 3 +++ src/server.cpp | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/blocked.cpp b/src/blocked.cpp index 1f807dac3..c09488b1f 100644 --- a/src/blocked.cpp +++ b/src/blocked.cpp @@ -64,6 +64,7 @@ */ #include "server.h" +#include int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where); @@ -180,6 +181,7 @@ void queueClientForReprocessing(client *c) { * of operation the client is blocking for. */ void unblockClient(client *c) { serverAssert(GlobalLocksAcquired()); + serverAssert(c->lock.fOwnLock()); if (c->btype == BLOCKED_LIST || c->btype == BLOCKED_ZSET || c->btype == BLOCKED_STREAM) { @@ -301,6 +303,7 @@ void handleClientsBlockedOnKeys(void) { while(numclients--) { listNode *clientnode = listFirst(clients); client *receiver = (client*)clientnode->value; + std::unique_lock lock(receiver->lock); if (receiver->btype != BLOCKED_LIST) { /* Put at the tail, so that at the next call diff --git a/src/server.cpp b/src/server.cpp index fa03b6efc..8afe5641c 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1785,6 +1785,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { UNUSED(id); UNUSED(clientData); + /* If another threads unblocked one of our clients, and this thread has been idle + then beforeSleep won't have a chance to process the unblocking. So we also + process them here in the cron job to ensure they don't starve. + */ + if (listLength(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients)) + { + processUnblockedClients(IDX_EVENT_LOOP_MAIN); + } + ProcessPendingAsyncWrites(); // This is really a bug, but for now catch any laggards that didn't clean up /* Software watchdog: deliver the SIGALRM that will reach the signal @@ -2051,6 +2060,15 @@ int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData int iel = ielFromEventLoop(eventLoop); serverAssert(iel != IDX_EVENT_LOOP_MAIN); + + /* If another threads unblocked one of our clients, and this thread has been idle + then beforeSleep won't have a chance to process the unblocking. So we also + process them here in the cron job to ensure they don't starve. + */ + if (listLength(g_pserver->rgthreadvar[iel].unblocked_clients)) + { + processUnblockedClients(iel); + } ProcessPendingAsyncWrites(); // A bug but leave for now, events should clean up after themselves clientsCron(iel); From 971f69fe1ff9cce50492a47f306b312457e50b9f Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 15 Jul 2019 14:55:18 -0400 Subject: [PATCH 98/99] Implement test mode to make finding bugs easier --- src/config.cpp | 2 ++ src/networking.cpp | 28 ++++++++++++++++++++++++---- src/server.cpp | 2 ++ src/server.h | 2 ++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index badedcee4..fd284e5d8 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -864,6 +864,8 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0], "version-override") && argc == 2) { KEYDB_SET_VERSION = zstrdup(argv[1]); serverLog(LL_WARNING, "Warning version is overriden to: %s\n", KEYDB_SET_VERSION); + } else if (!strcasecmp(argv[0],"testmode") && argc == 2){ + g_fTestMode = yesnotoi(argv[1]); } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } diff --git a/src/networking.cpp b/src/networking.cpp index 7cf0ae55f..1ffc1fea4 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1097,10 +1097,30 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) { serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport); int ielCur = ielFromEventLoop(el); - // We always accept on the same thread - aeAcquireLock(); - acceptCommonHandler(cfd,0,cip, ielCur); - aeReleaseLock(); + if (!g_fTestMode) + { + // We always accept on the same thread + LLocalThread: + aeAcquireLock(); + acceptCommonHandler(cfd,0,cip, ielCur); + aeReleaseLock(); + } + else + { + // In test mode we want a good distribution among threads and avoid the main thread + // since the main thread is most likely to work + int iel = IDX_EVENT_LOOP_MAIN; + while (cserver.cthreads > 1 && iel == IDX_EVENT_LOOP_MAIN) + iel = rand() % cserver.cthreads; + if (iel == ielFromEventLoop(el)) + goto LLocalThread; + char *szT = (char*)zmalloc(NET_IP_STR_LEN, MALLOC_LOCAL); + memcpy(szT, cip, NET_IP_STR_LEN); + aePostFunction(g_pserver->rgthreadvar[iel].el, [cfd, iel, szT]{ + acceptCommonHandler(cfd,0,szT, iel); + zfree(szT); + }); + } } } diff --git a/src/server.cpp b/src/server.cpp index 8afe5641c..8ccbab059 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -61,6 +61,8 @@ #include #include "aelocker.h" +int g_fTestMode = false; + /* Our shared "common" objects */ struct sharedObjectsStruct shared; diff --git a/src/server.h b/src/server.h index 8a762e239..912aba251 100644 --- a/src/server.h +++ b/src/server.h @@ -88,6 +88,8 @@ typedef long long mstime_t; /* millisecond time type. */ #include "endianconv.h" #include "crc64.h" +extern int g_fTestMode; + struct redisObject; class robj_roptr { From a19d0f67b9fffefd9029b9a2ef3358b110302bbf Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 15 Jul 2019 14:55:41 -0400 Subject: [PATCH 99/99] Crash when aborting SYNC with a master on a thread other than main --- src/replication.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index c782e7aa7..14ee002aa 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -2198,8 +2198,10 @@ int connectWithMaster(redisMaster *mi) { void undoConnectWithMaster(redisMaster *mi) { int fd = mi->repl_transfer_s; - aeDeleteFileEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,fd,AE_READABLE|AE_WRITABLE); - close(fd); + aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [fd]{ + aeDeleteFileEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,fd,AE_READABLE|AE_WRITABLE); + close(fd); + }); mi->repl_transfer_s = -1; }