diff --git a/cmd/icingadb/main.go b/cmd/icingadb/main.go index bd94686d7..4c0bb8f7e 100644 --- a/cmd/icingadb/main.go +++ b/cmd/icingadb/main.go @@ -101,6 +101,7 @@ func run() int { // the heartbeat is not read while HA gets stuck when updating the instance table. var heartbeat *icingaredis.Heartbeat var ha *icingadb.HA + var telemetrySyncStats *atomic.Pointer[telemetry.SuccessfulSync] { rc, err := cmd.Redis(logs.GetChildLogger("redis")) if err != nil { @@ -116,7 +117,7 @@ func run() int { ha = icingadb.NewHA(ctx, db, heartbeat, logs.GetChildLogger("high-availability")) telemetryLogger := logs.GetChildLogger("telemetry") - telemetry.StartHeartbeat(ctx, rc, telemetryLogger, ha, heartbeat) + telemetrySyncStats = telemetry.StartHeartbeat(ctx, rc, telemetryLogger, ha, heartbeat) telemetry.WriteStats(ctx, rc, telemetryLogger) } // Closing ha on exit ensures that this instance retracts its heartbeat @@ -250,7 +251,7 @@ func run() int { logger := logs.GetChildLogger("config-sync") if synctx.Err() == nil { - telemetry.LastSuccessfulSync.Store(telemetry.SuccessfulSync{ + telemetrySyncStats.Store(&telemetry.SuccessfulSync{ FinishMilli: syncEnd.UnixMilli(), DurationMilli: elapsed.Milliseconds(), }) diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index 6460ac32d..1bb04b17c 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -7,7 +7,6 @@ import ( "encoding/hex" "github.com/google/uuid" "github.com/icinga/icinga-go-library/backoff" - "github.com/icinga/icinga-go-library/com" "github.com/icinga/icinga-go-library/database" "github.com/icinga/icinga-go-library/logging" "github.com/icinga/icinga-go-library/retry" @@ -19,6 +18,7 @@ import ( "github.com/pkg/errors" "go.uber.org/zap" "sync" + "sync/atomic" "time" ) @@ -35,7 +35,7 @@ type haState struct { // HA provides high availability and indicates whether a Takeover or Handover must be made. type HA struct { - state com.Atomic[haState] + state atomic.Pointer[haState] ctx context.Context cancelCtx context.CancelFunc instanceId types.Binary @@ -71,6 +71,8 @@ func NewHA(ctx context.Context, db *database.DB, heartbeat *icingaredis.Heartbea done: make(chan struct{}), } + ha.state.Store(&haState{}) + go ha.controller() return ha @@ -121,7 +123,8 @@ func (h *HA) Takeover() chan string { // State returns the status quo. func (h *HA) State() (responsibleTsMilli int64, responsible, otherResponsible bool) { - state, _ := h.state.Load() + state := h.state.Load() + return state.responsibleTsMilli, state.responsible, state.otherResponsible } @@ -428,9 +431,12 @@ func (h *HA) realize( h.signalTakeover(takeover) } else if otherResponsible { - if state, _ := h.state.Load(); !state.otherResponsible { - state.otherResponsible = true - h.state.Store(state) + if state := h.state.Load(); !state.otherResponsible { + // Dereference pointer to create a copy of the value it points to. + // Ensures that any modifications do not directly affect the original data unless explicitly stored back. + newState := *state + newState.otherResponsible = true + h.state.Store(&newState) } } @@ -496,7 +502,7 @@ func (h *HA) removeOldInstances(s *icingaredisv1.IcingaStatus, envId types.Binar // signalHandover gives up HA.responsible and notifies the HA.Handover chan. func (h *HA) signalHandover(reason string) { if h.responsible { - h.state.Store(haState{ + h.state.Store(&haState{ responsibleTsMilli: time.Now().UnixMilli(), responsible: false, otherResponsible: false, @@ -514,7 +520,7 @@ func (h *HA) signalHandover(reason string) { // signalTakeover claims HA.responsible and notifies the HA.Takeover chan. func (h *HA) signalTakeover(reason string) { if !h.responsible { - h.state.Store(haState{ + h.state.Store(&haState{ responsibleTsMilli: time.Now().UnixMilli(), responsible: true, otherResponsible: false, diff --git a/pkg/icingaredis/telemetry/heartbeat.go b/pkg/icingaredis/telemetry/heartbeat.go index 041a696d0..c29b15335 100644 --- a/pkg/icingaredis/telemetry/heartbeat.go +++ b/pkg/icingaredis/telemetry/heartbeat.go @@ -3,7 +3,6 @@ package telemetry import ( "context" "fmt" - "github.com/icinga/icinga-go-library/com" "github.com/icinga/icinga-go-library/logging" "github.com/icinga/icinga-go-library/periodic" "github.com/icinga/icinga-go-library/redis" @@ -80,16 +79,17 @@ func GetCurrentDbConnErr() (string, int64) { // OngoingSyncStartMilli is to be updated by the main() function. var OngoingSyncStartMilli int64 -// LastSuccessfulSync is to be updated by the main() function. -var LastSuccessfulSync com.Atomic[SuccessfulSync] - var boolToStr = map[bool]string{false: "0", true: "1"} var startTime = time.Now().UnixMilli() // StartHeartbeat periodically writes heartbeats to Redis for being monitored by Icinga 2. +// It returns an atomic pointer to SuccessfulSync, +// which contains synchronisation statistics that the caller should update. func StartHeartbeat( ctx context.Context, client *redis.Client, logger *logging.Logger, ha ha, heartbeat *icingaredis.Heartbeat, -) { +) *atomic.Pointer[SuccessfulSync] { + var syncStats atomic.Pointer[SuccessfulSync] + syncStats.Store(&SuccessfulSync{}) goMetrics := NewGoMetrics() const interval = time.Second @@ -101,7 +101,7 @@ func StartHeartbeat( heartbeat := heartbeat.LastReceived() responsibleTsMilli, responsible, otherResponsible := ha.State() ongoingSyncStart := atomic.LoadInt64(&OngoingSyncStartMilli) - sync, _ := LastSuccessfulSync.Load() + lastSync := syncStats.Load() dbConnErr, dbConnErrSinceMilli := GetCurrentDbConnErr() now := time.Now() @@ -117,8 +117,8 @@ func StartHeartbeat( "ha-responsible-ts": strconv.FormatInt(responsibleTsMilli, 10), "ha-other-responsible": boolToStr[otherResponsible], "sync-ongoing-since": strconv.FormatInt(ongoingSyncStart, 10), - "sync-success-finish": strconv.FormatInt(sync.FinishMilli, 10), - "sync-success-duration": strconv.FormatInt(sync.DurationMilli, 10), + "sync-success-finish": strconv.FormatInt(lastSync.FinishMilli, 10), + "sync-success-duration": strconv.FormatInt(lastSync.DurationMilli, 10), } ctx, cancel := context.WithDeadline(ctx, tick.Time.Add(interval)) @@ -145,6 +145,8 @@ func StartHeartbeat( silenceUntil = time.Time{} } }) + + return &syncStats } type goMetrics struct {