From 9e342b5e50b985f25d91081a744fecae44ee6fdd Mon Sep 17 00:00:00 2001 From: pavelmaliy Date: Sun, 5 Dec 2021 09:42:41 +0200 Subject: [PATCH] fix om (#726) * fix om * fix om --- operations/config.go | 6 ++++-- operations/scheduler.go | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/operations/config.go b/operations/config.go index 745117a8a..2a199fbc8 100644 --- a/operations/config.go +++ b/operations/config.go @@ -38,8 +38,9 @@ type Settings struct { MaintainerRetryInterval time.Duration `mapstructure:"maintainer_retry_interval" description:"maintenance retry interval"` Lifespan time.Duration `mapstructure:"lifespan" description:"after that time is passed since its creation, the operation can be cleaned up by the maintainer"` - ReschedulingInterval time.Duration `mapstructure:"rescheduling_interval" description:"the interval between auto rescheduling of operation actions"` - PollingInterval time.Duration `mapstructure:"polling_interval" description:"the interval between polls for async requests"` + ReschedulingInterval time.Duration `mapstructure:"rescheduling_interval" description:"the interval between auto rescheduling of operation actions"` + ReschedulingLongInterval time.Duration `mapstructure:"rescheduling_long_interval" description:"the interval between auto rescheduling of operation actions after multiple retries"` + PollingInterval time.Duration `mapstructure:"polling_interval" description:"the interval between polls for async requests"` DefaultPoolSize int `mapstructure:"default_pool_size" description:"default worker pool size"` DefaultCascadePollingPoolSize int `mapstructure:"default_cascade_polling_pool_size" description:"default worker pool size"` @@ -58,6 +59,7 @@ func DefaultSettings() *Settings { MaintainerRetryInterval: 10 * time.Minute, Lifespan: 7 * 24 * time.Hour, ReschedulingInterval: 10 * time.Second, + ReschedulingLongInterval: 1 * time.Hour, PollingInterval: 4 * time.Second, PollCascadeInterval: 4 * time.Second, DefaultPoolSize: 20, diff --git a/operations/scheduler.go b/operations/scheduler.go index 06a3642c2..e651a262b 100644 --- a/operations/scheduler.go +++ b/operations/scheduler.go @@ -46,6 +46,7 @@ type Scheduler struct { reconciliationOperationTimeout time.Duration cascadeOrphanMitigationTimeout time.Duration reschedulingDelay time.Duration + reschedulingLongDelay time.Duration wg *sync.WaitGroup } @@ -59,6 +60,7 @@ func NewScheduler(smCtx context.Context, repository storage.TransactionalReposit reconciliationOperationTimeout: settings.ReconciliationOperationTimeout, cascadeOrphanMitigationTimeout: settings.CascadeOrphanMitigationTimeout, reschedulingDelay: settings.ReschedulingInterval, + reschedulingLongDelay: settings.ReschedulingLongInterval, wg: wg, } } @@ -551,6 +553,9 @@ func (s *Scheduler) handleActionResponseFailure(ctx context.Context, actionError // if deletion timestamp was set on the op, reschedule the same op with delete action and wait for reschedulingDelay time // so that we don't DOS the broker reschedulingDelayTimeout := time.After(s.reschedulingDelay) + if time.Now().UTC().After(opAfterJob.DeletionScheduled.Add(s.actionTimeout * 2)) { + reschedulingDelayTimeout = time.After(s.reschedulingLongDelay) + } select { case <-s.smCtx.Done(): return fmt.Errorf("sm context canceled: %s", s.smCtx.Err())