From c9deda3036b9a2b59f9412bb05e9b26f78b4fa7a Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 12 Aug 2024 17:25:53 +0200 Subject: [PATCH 1/2] HA: Reduce deadlocks via exclusive locking (`SELECT ... FOR UPDATE`) Within the HA transaction, we always retrieve rows from the `icingadb_instance` table to determine responsibility between the instances. Previously, this selection was performed using a shared lock (`SELECT ... LOCK IN SHARE MODE`). Although selectin rows is generally not an issue, both Icinga DB instances perform an upsert on the same table right after the select statement, resulting in deadlock errors most of the time. In order to reduce the deadlocks on both sides, an exclusive lock on the selected rows is necessary, which can be achieved using the `SELECT ... FOR UDPATE` command. However, deadlocks can still occur if the `icingadb_instance` table is empty, i.e. when rows are available to lock exclusively. --- pkg/icingadb/ha.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index 1bb04b17c..0f66e135f 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -294,9 +294,15 @@ func (h *HA) realize( if h.db.DriverName() == database.MySQL { // The RDBMS may actually be a Percona XtraDB Cluster which doesn't - // support serializable transactions, but only their following equivalent: + // support serializable transactions, but only their equivalent SELECT ... LOCK IN SHARE MODE. + // However, in order to reduce the deadlocks on both sides, it is necessary to obtain an exclusive lock + // on the selected rows. This can be achieved by utilising the SELECT ... FOR UPDATE command. + // Nevertheless, deadlocks may still occur, when the "icingadb_instance" table is empty, i.e. when + // there's no available row to be locked exclusively. + // + // See also: https://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html isoLvl = sql.LevelRepeatableRead - selectLock = " LOCK IN SHARE MODE" + selectLock = " FOR UPDATE" } tx, errBegin := h.db.BeginTxx(ctx, &sql.TxOptions{Isolation: isoLvl}) From 04dd8ff6a5df4378ac83ba4f7ec234768155bca0 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 25 Oct 2024 14:21:27 +0200 Subject: [PATCH 2/2] HA: Use `... FOR UPDATE` lock clause unconditionally --- pkg/icingadb/ha.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index 0f66e135f..91e9a0376 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -290,19 +290,12 @@ func (h *HA) realize( takeover = "" otherResponsible = false isoLvl := sql.LevelSerializable - selectLock := "" if h.db.DriverName() == database.MySQL { - // The RDBMS may actually be a Percona XtraDB Cluster which doesn't - // support serializable transactions, but only their equivalent SELECT ... LOCK IN SHARE MODE. - // However, in order to reduce the deadlocks on both sides, it is necessary to obtain an exclusive lock - // on the selected rows. This can be achieved by utilising the SELECT ... FOR UPDATE command. - // Nevertheless, deadlocks may still occur, when the "icingadb_instance" table is empty, i.e. when - // there's no available row to be locked exclusively. - // - // See also: https://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html + // The RDBMS may actually be a Percona XtraDB Cluster which doesn't support serializable + // transactions, but only their equivalent SELECT ... LOCK IN SHARE MODE. + // See https://dev.mysql.com/doc/refman/8.4/en/innodb-transaction-isolation-levels.html#isolevel_serializable isoLvl = sql.LevelRepeatableRead - selectLock = " FOR UPDATE" } tx, errBegin := h.db.BeginTxx(ctx, &sql.TxOptions{Isolation: isoLvl}) @@ -310,8 +303,15 @@ func (h *HA) realize( return errors.Wrap(errBegin, "can't start transaction") } - query := h.db.Rebind("SELECT id, heartbeat FROM icingadb_instance "+ - "WHERE environment_id = ? AND responsible = ? AND id <> ?") + selectLock + // In order to reduce the deadlocks on both sides, it is necessary to obtain an exclusive lock + // on the selected rows. This can be achieved by utilising the SELECT ... FOR UPDATE command. + // Nevertheless, deadlocks may still occur, when the "icingadb_instance" table is empty, i.e. when + // there's no available row to be locked exclusively. + // + // Note that even without the ... FOR UPDATE lock clause, this shouldn't cause a deadlock on PostgreSQL. + // Instead, it triggers a read/write serialization failure when attempting to commit the transaction. + query := h.db.Rebind("SELECT id, heartbeat FROM icingadb_instance " + + "WHERE environment_id = ? AND responsible = ? AND id <> ? FOR UPDATE") instance := &v1.IcingadbInstance{} errQuery := tx.QueryRowxContext(ctx, query, envId, "y", h.instanceId).StructScan(instance)