Skip to content

Commit

Permalink
fix: checking for split brain (#87)
Browse files Browse the repository at this point in the history
MySQL replicas rarely could commit before master
  • Loading branch information
Fizic authored Mar 20, 2024
1 parent e4dc375 commit 5f26de5
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 2 deletions.
8 changes: 7 additions & 1 deletion internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,12 @@ func (app *App) calcActiveNodes(clusterState, clusterStateDcs map[string]*NodeSt
app.logger.Warnf("failed to get master status %v", err)
return nil, err
}
muuid, err := masterNode.UUID()
if err != nil {
app.logger.Warnf("failed to get master uuid %v", err)
return nil, err
}

for host, node := range clusterState {
if host == master {
activeNodes = append(activeNodes, master)
Expand Down Expand Up @@ -869,7 +875,7 @@ func (app *App) calcActiveNodes(clusterState, clusterStateDcs map[string]*NodeSt
continue
}
sgtids := gtids.ParseGtidSet(sstatus.ExecutedGtidSet)
if !(sstatus.ReplicationState == mysql.ReplicationRunning && isGTIDLessOrEqual(sgtids, mgtids)) {
if sstatus.ReplicationState != mysql.ReplicationRunning || isSplitBrained(sgtids, mgtids, muuid) {
app.logger.Errorf("calc active nodes: %s is not replicating or splitbrained, deleting from active...", host)
continue
}
Expand Down
25 changes: 25 additions & 0 deletions internal/app/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"fmt"
"time"

gomysql "github.com/go-mysql-org/go-mysql/mysql"
"github.com/google/uuid"
"github.com/yandex/mysync/internal/log"
"github.com/yandex/mysync/internal/mysql"
"github.com/yandex/mysync/internal/mysql/gtids"
Expand Down Expand Up @@ -233,6 +235,29 @@ func isGTIDLessOrEqual(slaveGtidSet, masterGtidSet gtids.GTIDSet) bool {
return masterGtidSet.Contain(slaveGtidSet) || masterGtidSet.Equal(slaveGtidSet)
}

func isSplitBrained(slaveGtidSet, masterGtidSet gtids.GTIDSet, masterUUID uuid.UUID) bool {
mysqlSlaveGtidSet := slaveGtidSet.(*gomysql.MysqlGTIDSet)
mysqlMasterGtidSet := masterGtidSet.(*gomysql.MysqlGTIDSet)
for _, slaveSet := range mysqlSlaveGtidSet.Sets {
masterSet, ok := mysqlMasterGtidSet.Sets[slaveSet.SID.String()]
if !ok {
return true
}

if masterSet.Contain(slaveSet) {
continue
}

if masterSet.SID == masterUUID {
continue
}

return true
}

return false
}

func validatePriority(priority *int64) error {
if priority == nil || *priority >= 0 {
return nil
Expand Down
49 changes: 49 additions & 0 deletions internal/app/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,52 @@ func getLogger() *log.Logger {
}
return l
}

func TestIsSplitBrained(t *testing.T) {
masterGTID := mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-100," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100")
masterUUID := masterGTID.(*gomysql.MysqlGTIDSet).Sets["6dbc0b04-4b09-43dc-86cc-9af852ded919"].SID

// equal gtids
slaveGTID := mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-100," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100")
ok := isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.False(t, ok)

// the replica is lagging behind the master
slaveGTID = mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-99," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100")
ok = isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.False(t, ok)

// the replica is lagging behind the new master
slaveGTID = mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-100," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100")
ok = isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.False(t, ok)

// the replica applied the transaction from the master before the master
slaveGTID = mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-101," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100")
ok = isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.False(t, ok)

// the replica applied a transaction not from the master
slaveGTID = mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-100," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-101")
ok = isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.True(t, ok)

// the replica applied a new transaction not from the master
slaveGTID = mustGTIDSet("6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-101," +
"09978591-5754-4710-BF67-062880ABE1B4:1-100," +
"AA6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100," +
"BB6890C8-69F8-4BC4-B3A5-5D3FEA8C28CF:1-100")
ok = isSplitBrained(slaveGTID, masterGTID, masterUUID)
require.True(t, ok)
}
4 changes: 4 additions & 0 deletions internal/mysql/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ type readOnlyResult struct {
SuperReadOnly int `db:"SuperReadOnly"`
}

type ServerUUIDResult struct {
ServerUUID string `db:"server_uuid"`
}

// CascadeNodeConfiguration is a dcs node configuration for cascade mysql replica
type CascadeNodeConfiguration struct {
// StreamFrom - is a host to stream from. Can be changed from CLI.
Expand Down
21 changes: 20 additions & 1 deletion internal/mysql/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ import (
"time"

"github.com/go-sql-driver/mysql"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
"github.com/shirou/gopsutil/v3/process"

"github.com/yandex/mysync/internal/config"
"github.com/yandex/mysync/internal/log"
"github.com/yandex/mysync/internal/mysql/gtids"
Expand All @@ -34,6 +34,7 @@ type Node struct {
db *sqlx.DB
version *Version
host string
uuid uuid.UUID
}

var (
Expand Down Expand Up @@ -602,6 +603,24 @@ func (n *Node) GetBinlogs() ([]Binlog, error) {
return binlogs, err
}

// UUID returns server_uuid
func (n *Node) UUID() (uuid.UUID, error) {
if n.uuid.ID() != 0 {
return n.uuid, nil
}
var r ServerUUIDResult
err := n.queryRow(queryGetUUID, nil, &r)
if err != nil {
return uuid.UUID{}, err
}
v, err := uuid.Parse(r.ServerUUID)
if err != nil {
return uuid.UUID{}, err
}
n.uuid = v
return v, err
}

// IsReadOnly returns (true, true) if MySQL Node in (read-only, super-read-only) mode
func (n *Node) IsReadOnly() (bool, bool, error) {
var ror readOnlyResult
Expand Down
2 changes: 2 additions & 0 deletions internal/mysql/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const (
queryReplicaStatus = "replica_status"
queryGetVersion = "get_version"
queryGTIDExecuted = "gtid_executed"
queryGetUUID = "get_uuid"
queryShowBinaryLogs = "binary_logs"
queryReplicationLag = "replication_lag"
querySlaveHosts = "slave_hosts"
Expand Down Expand Up @@ -53,6 +54,7 @@ var DefaultQueries = map[string]string{
queryReplicaStatus: `SHOW REPLICA STATUS FOR CHANNEL :channel`,
queryGetVersion: `SELECT sys.version_major() AS MajorVersion, sys.version_minor() AS MinorVersion, sys.version_patch() AS PatchVersion`,
queryGTIDExecuted: `SELECT @@GLOBAL.gtid_executed as Executed_Gtid_Set`,
queryGetUUID: `SELECT @@server_uuid as server_uuid`,
queryShowBinaryLogs: `SHOW BINARY LOGS`,
querySlaveHosts: `SHOW SLAVE HOSTS`,
queryReplicationLag: ``,
Expand Down

0 comments on commit 5f26de5

Please sign in to comment.