From f8fa60cf5d7bb5621bf238edc61a4804e54c283d Mon Sep 17 00:00:00 2001 From: Eric Lippmann Date: Tue, 2 Apr 2024 12:35:02 +0200 Subject: [PATCH] `ha`: Give up retrying after 5 minutes Since we are now retrying every database error, we also need to set a retry timeout. --- pkg/icingadb/ha.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index de31f773f..3d8105ba0 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -327,6 +327,7 @@ func (h *HA) realize(ctx context.Context, s *icingaredisv1.IcingaStatus, t *type retry.Retryable, backoff.NewExponentialWithJitter(time.Millisecond*256, time.Second*3), retry.Settings{ + Timeout: time.Minute * 5, OnError: func(_ time.Duration, attempt uint64, err, lastErr error) { if lastErr == nil || err.Error() != lastErr.Error() { log := h.logger.Debugw @@ -337,6 +338,22 @@ func (h *HA) realize(ctx context.Context, s *icingaredisv1.IcingaStatus, t *type log("Can't update or insert instance. Retrying", zap.Error(err), zap.Uint64("retry count", attempt)) } }, + OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { + if attempt > 0 { + log := h.logger.Debugw + + if attempt > 3 { + // We log errors with severity info starting from the fourth attempt, (see above) + // so we need to log success with severity info from the fifth attempt. + log = h.logger.Infow + } + + log("Instance updated/inserted successfully after error", + zap.Duration("after", elapsed), + zap.Uint64("attempts", attempt+1), + zap.NamedError("recovered_error", lastErr)) + } + }, }, ) if err != nil {