Skip to content

Commit

Permalink
Postpone detection of master rdb-channel capabilities
Browse files Browse the repository at this point in the history
At the replica side wait until the master respond with -FULLSYNCNEEDED
before marking it as capable to rdb-channel sync.

Signed-off-by: naglera <[email protected]>
  • Loading branch information
naglera committed Jun 13, 2024
1 parent 5411d92 commit 007ad2f
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 52 deletions.
68 changes: 23 additions & 45 deletions src/replication.c
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,10 @@ void syncCommand(client *c) {
* resync on purpose when they are not able to partially
* resync. */
if (master_replid[0] != '?') server.stat_sync_partial_err++;
if (c->flags & CLIENT_REPL_MAIN_CHANNEL) {
serverLog(LL_NOTICE,"Replica %s is marked as main-conn, and psync isn't possible. Full sync will continue with dedicated RDB connection.", replicationGetSlaveName(c));
if (c->slave_capa & SLAVE_CAPA_RDB_CHANNEL) {
c->flags |= CLIENT_REPL_MAIN_CHANNEL;
serverLog(LL_NOTICE,"Replica %s is capable of rdb-channel synchronization, and partial sync isn't possible. "
"Full sync will continue with dedicated RDB connection.", replicationGetSlaveName(c));
if (connWrite(c->conn,"-FULLSYNCNEEDED\r\n",17) != 17) {
freeClientAsync(c);
}
Expand Down Expand Up @@ -1253,10 +1255,11 @@ void syncCommand(client *c) {
* the master can accurately lists replicas and their listening ports in the
* INFO output.
*
* - capa <eof|psync2>
* - capa <eof|psync2|dual-channel>
* What is the capabilities of this instance.
* eof: supports EOF-style RDB transfer for diskless replication.
* psync2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
* dual-channel: supports full sync using rdb channel.
*
* - ack <offset> [fack <aofofs>]
* Replica informs the master the amount of replication stream that it
Expand All @@ -1278,11 +1281,6 @@ void syncCommand(client *c) {
* - rdb-channel <1|0>
* Used to identify the client as a replica's rdb connection in an rdb connection
* sync session.
*
* - main-conn <1|0>
* Used to identify the replica main connection during
* rdb-connection sync. It also means that if psync is impossible, master
* should not auto trigger full sync.
* */
void replconfCommand(client *c) {
int j;
Expand Down Expand Up @@ -1319,6 +1317,12 @@ void replconfCommand(client *c) {
c->slave_capa |= SLAVE_CAPA_EOF;
else if (!strcasecmp(c->argv[j+1]->ptr,"psync2"))
c->slave_capa |= SLAVE_CAPA_PSYNC2;
else if (!strcasecmp(c->argv[j+1]->ptr,"dual-channel") &&
server.rdb_channel_enabled && server.repl_diskless_sync) {
/* If rdb-channel is disable on this master, treat this command as unrecognized
* replconf option. */
c->slave_capa |= SLAVE_CAPA_RDB_CHANNEL;
}
} else if (!strcasecmp(c->argv[j]->ptr,"ack")) {
/* REPLCONF ACK is used by slave to inform the master the amount
* of replication stream that it processed so far. It is an
Expand Down Expand Up @@ -1409,28 +1413,6 @@ void replconfCommand(client *c) {
c->flags &= ~CLIENT_REPL_RDB_CHANNEL;
c->slave_req &= ~SLAVE_REQ_RDB_CHANNEL;
}
} else if (!strcasecmp(c->argv[j]->ptr, "main-conn") && server.rdb_channel_enabled) {
/* If rdb-channel is disable on this master, treat this command as unrecognized
* replconf option. */
long main_conn = 0;
if (getRangeLongFromObjectOrReply(c, c->argv[j +1],
0, 1, &main_conn, NULL) != C_OK) {
return;
}
if (main_conn == 1) {
if (!server.repl_diskless_sync) {
/* When the primary uses disk for full sync, replicas can usually join during the time the
* primary saves the database to disk. RDB-channel-sync, however, does not allow replicas
* to join the primary since the COB does not keep the needed replication data. To avoid
* making the primary create a new snapshot for each replica, we forbid rdb-channel-sync
* along with primary disk-based sync */
addReplyErrorFormat(c,"Rdb channel sync is not allowed when repl-diskless-sync disabled on primary");
return;
}
c->flags |= CLIENT_REPL_MAIN_CHANNEL;
} else {
c->flags &= ~CLIENT_REPL_MAIN_CHANNEL;
}
} else if (!strcasecmp(c->argv[j]->ptr, "set-rdb-conn-id")) {
/* REPLCONF identify <client-id> is used to identify the current replica main connection with existing
* rdb-connection with the given id. */
Expand Down Expand Up @@ -3141,6 +3123,9 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {

if (!strncmp(reply,"+FULLRESYNC",11)) {
char *replid = NULL, *offset = NULL;
if (server.rdb_channel_enabled) {
server.master_supports_rdb_channel = 0;
}

/* FULL RESYNC, parse the reply in order to extract the replid
* and the replication offset. */
Expand Down Expand Up @@ -3241,8 +3226,10 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
}

if (!strncmp(reply, "-FULLSYNCNEEDED", 15)) {
/* In case the main connection with master is at main-conn mode, the master
* will respond with -FULLSYNCNEEDED to imply that psync is not possible */
/* A response of -FULLSYNCNEEDED from the master implies that partial
* synchronization is not possible. Full sync will continue on dedicated
* RDB channel. */
server.master_supports_rdb_channel = 1;
serverLog(LL_NOTICE, "PSYNC is not possible, initialize RDB channel.");
sdsfree(reply);
return PSYNC_FULLRESYNC_RDB_CONN;
Expand Down Expand Up @@ -3489,11 +3476,10 @@ void syncWithMaster(connection *conn) {
if (err) goto write_error;
}

/* When using rdb-channel for sync, mark the main connection only for psync.
* The master's capabilities will also be verified here, since if the master
* does not support rdb-channel sync, it will not understand this command. */
/* When using rdb-channel for sync, announce that the replica is capable
* of rdb channel sync. */
if (server.rdb_channel_enabled) {
err = sendCommand(conn,"REPLCONF", "main-conn", "1", NULL);
err = sendCommand(conn,"REPLCONF", "capa" ,"dual-channel", NULL);
}

/* Inform the master of our (slave) capabilities.
Expand Down Expand Up @@ -3569,15 +3555,7 @@ void syncWithMaster(connection *conn) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto error;
else if (err[0] == '-') {
/* Activate rdb-channel fallback mechanism. The master did not understand
* REPLCONF main-conn. This means the master does not support RDB channel
* synchronization. The replica will continue the sync session with one
* channel (normally). */
serverLog(LL_WARNING, "Master does not understand REPLCONF main-conn aborting rdb-channel sync %s", err);
server.master_supports_rdb_channel = 0;
} else if (memcmp(err, "+OK", 3) == 0) {
/* Master support rdb-connection sync. Continue with rdb-channel approach. */
server.master_supports_rdb_channel = 1;
serverLog(LL_NOTICE,"(Non critical) Master is not capable of rdb-channel sync");
}
sdsfree(err);
server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
Expand Down
1 change: 1 addition & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ typedef enum {
#define SLAVE_CAPA_NONE 0
#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
#define SLAVE_CAPA_RDB_CHANNEL (1<<2) /* Supports RDB channel sync */

/* Slave requirements */
#define SLAVE_REQ_NONE 0
Expand Down
11 changes: 4 additions & 7 deletions tests/integration/repl-rdb-channel.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -322,18 +322,12 @@ start_server {tags {"repl rdb-channel external:skip"}} {
$master set key5 val5

test "Rdb-channel-sync fails when primary diskless disabled" {
set cur_psync [status $master sync_partial_ok]
$master config set repl-diskless-sync no

$replica1 config set repl-rdb-channel yes
$replica1 slaveof $master_host $master_port

# Wait for sync to fail
wait_for_condition 100 50 {
[log_file_matches $replica1_log "*Master does not understand REPLCONF main-conn*"]
} else {
fail "rdb-connection sync rollback should have been triggered."
}

# Wait for mitigation and resync
wait_for_value_to_propegate_to_replica $master $replica1 "key5"

Expand All @@ -342,6 +336,9 @@ start_server {tags {"repl rdb-channel external:skip"}} {
} else {
fail "Replica is not synced"
}

# Verify that we did not use rdb-channel sync
assert {[status $master sync_partial_ok] == $cur_psync}
}
}
}
Expand Down

0 comments on commit 007ad2f

Please sign in to comment.