From 04d86b1c368af2045ea42e0c582a0167aac76735 Mon Sep 17 00:00:00 2001 From: Jennifer Chen <32009013+jennchenn@users.noreply.github.com> Date: Wed, 22 Nov 2023 15:37:55 -0500 Subject: [PATCH] Support `CanaryAutoPauseMaxSlowStartDuration` option (#997) * Add support for max slow start duration config param * Update max slow start arg name to include auto pause * fixup! Update max slow start arg name to include auto pause --- .../datadogagent/component/agent/new.go | 17 ++++-- controllers/setup.go | 32 +++++----- main.go | 60 ++++++++++--------- 3 files changed, 60 insertions(+), 49 deletions(-) diff --git a/controllers/datadogagent/component/agent/new.go b/controllers/datadogagent/component/agent/new.go index 4878e31c7a..896487d5d5 100644 --- a/controllers/datadogagent/component/agent/new.go +++ b/controllers/datadogagent/component/agent/new.go @@ -64,12 +64,13 @@ type ExtendedDaemonsetOptions struct { MaxPodUnavailable string MaxPodSchedulerFailure string - CanaryDuration time.Duration - CanaryReplicas string - CanaryAutoPauseEnabled bool - CanaryAutoPauseMaxRestarts int32 - CanaryAutoFailEnabled bool - CanaryAutoFailMaxRestarts int32 + CanaryDuration time.Duration + CanaryReplicas string + CanaryAutoPauseEnabled bool + CanaryAutoPauseMaxRestarts int32 + CanaryAutoFailEnabled bool + CanaryAutoFailMaxRestarts int32 + CanaryAutoPauseMaxSlowStartDuration time.Duration } func defaultEDSSpec(options *ExtendedDaemonsetOptions) edsv1alpha1.ExtendedDaemonSetSpec { @@ -101,6 +102,10 @@ func defaultEDSSpec(options *ExtendedDaemonsetOptions) edsv1alpha1.ExtendedDaemo spec.Strategy.Canary.AutoFail.MaxRestarts = edsv1alpha1.NewInt32(options.CanaryAutoFailMaxRestarts) } + if options.CanaryAutoPauseMaxSlowStartDuration != 0 { + spec.Strategy.Canary.AutoPause.MaxSlowStartDuration = &metav1.Duration{Duration: options.CanaryAutoPauseMaxSlowStartDuration} + } + spec.Strategy.Canary.AutoPause.Enabled = edsv1alpha1.NewBool(options.CanaryAutoPauseEnabled) if options.CanaryAutoPauseMaxRestarts > 0 { spec.Strategy.Canary.AutoPause.MaxRestarts = edsv1alpha1.NewInt32(options.CanaryAutoPauseMaxRestarts) diff --git a/controllers/setup.go b/controllers/setup.go index 8afcfcdf91..73fb7c0dea 100644 --- a/controllers/setup.go +++ b/controllers/setup.go @@ -49,12 +49,13 @@ type ExtendedDaemonsetOptions struct { MaxPodUnavailable string MaxPodSchedulerFailure string - CanaryDuration time.Duration - CanaryReplicas string - CanaryAutoPauseEnabled bool - CanaryAutoPauseMaxRestarts int - CanaryAutoFailEnabled bool - CanaryAutoFailMaxRestarts int + CanaryDuration time.Duration + CanaryReplicas string + CanaryAutoPauseEnabled bool + CanaryAutoPauseMaxRestarts int + CanaryAutoFailEnabled bool + CanaryAutoFailMaxRestarts int + CanaryAutoPauseMaxSlowStartDuration time.Duration } type starterFunc func(logr.Logger, manager.Manager, *version.Info, kubernetes.PlatformInfo, SetupOptions) error @@ -122,15 +123,16 @@ func startDatadogAgent(logger logr.Logger, mgr manager.Manager, vInfo *version.I Recorder: mgr.GetEventRecorderFor(agentControllerName), Options: datadogagent.ReconcilerOptions{ ExtendedDaemonsetOptions: componentagent.ExtendedDaemonsetOptions{ - Enabled: options.SupportExtendedDaemonset.Enabled, - MaxPodUnavailable: options.SupportExtendedDaemonset.MaxPodUnavailable, - MaxPodSchedulerFailure: options.SupportExtendedDaemonset.MaxPodSchedulerFailure, - CanaryDuration: options.SupportExtendedDaemonset.CanaryDuration, - CanaryReplicas: options.SupportExtendedDaemonset.CanaryReplicas, - CanaryAutoPauseEnabled: options.SupportExtendedDaemonset.CanaryAutoPauseEnabled, - CanaryAutoPauseMaxRestarts: int32(options.SupportExtendedDaemonset.CanaryAutoPauseMaxRestarts), - CanaryAutoFailEnabled: options.SupportExtendedDaemonset.CanaryAutoFailEnabled, - CanaryAutoFailMaxRestarts: int32(options.SupportExtendedDaemonset.CanaryAutoFailMaxRestarts), + Enabled: options.SupportExtendedDaemonset.Enabled, + MaxPodUnavailable: options.SupportExtendedDaemonset.MaxPodUnavailable, + MaxPodSchedulerFailure: options.SupportExtendedDaemonset.MaxPodSchedulerFailure, + CanaryDuration: options.SupportExtendedDaemonset.CanaryDuration, + CanaryReplicas: options.SupportExtendedDaemonset.CanaryReplicas, + CanaryAutoPauseEnabled: options.SupportExtendedDaemonset.CanaryAutoPauseEnabled, + CanaryAutoPauseMaxRestarts: int32(options.SupportExtendedDaemonset.CanaryAutoPauseMaxRestarts), + CanaryAutoPauseMaxSlowStartDuration: options.SupportExtendedDaemonset.CanaryAutoPauseMaxSlowStartDuration, + CanaryAutoFailEnabled: options.SupportExtendedDaemonset.CanaryAutoFailEnabled, + CanaryAutoFailMaxRestarts: int32(options.SupportExtendedDaemonset.CanaryAutoFailMaxRestarts), }, SupportCilium: options.SupportCilium, OperatorMetricsEnabled: options.OperatorMetricsEnabled, diff --git a/main.go b/main.go index 85754aa2d8..8a10eaf7be 100644 --- a/main.go +++ b/main.go @@ -82,8 +82,9 @@ const ( defaultCanaryAutoPauseEnabled = true defaultCanaryAutoFailEnabled = true // default to 0, to use default value from EDS. - defaultCanaryAutoPauseMaxRestarts = 0 - defaultCanaryAutoFailMaxRestarts = 0 + defaultCanaryAutoPauseMaxRestarts = 0 + defaultCanaryAutoFailMaxRestarts = 0 + defaultCanaryAutoPauseMaxSlowStartDuration = 0 ) type options struct { @@ -101,23 +102,24 @@ type options struct { leaderElectionLeaseDuration time.Duration // Controllers options - supportExtendedDaemonset bool - edsMaxPodUnavailable string - edsMaxPodSchedulerFailure string - edsCanaryDuration time.Duration - edsCanaryReplicas string - edsCanaryAutoPauseEnabled bool - edsCanaryAutoPauseMaxRestarts int - edsCanaryAutoFailEnabled bool - edsCanaryAutoFailMaxRestarts int - supportCilium bool - datadogAgentEnabled bool - datadogMonitorEnabled bool - datadogSLOEnabled bool - operatorMetricsEnabled bool - webhookEnabled bool - v2APIEnabled bool - maximumGoroutines int + supportExtendedDaemonset bool + edsMaxPodUnavailable string + edsMaxPodSchedulerFailure string + edsCanaryDuration time.Duration + edsCanaryReplicas string + edsCanaryAutoPauseEnabled bool + edsCanaryAutoPauseMaxRestarts int + edsCanaryAutoFailEnabled bool + edsCanaryAutoFailMaxRestarts int + edsCanaryAutoPauseMaxSlowStartDuration time.Duration + supportCilium bool + datadogAgentEnabled bool + datadogMonitorEnabled bool + datadogSLOEnabled bool + operatorMetricsEnabled bool + webhookEnabled bool + v2APIEnabled bool + maximumGoroutines int // Secret Backend options secretBackendCommand string @@ -161,6 +163,7 @@ func (opts *options) Parse() { flag.IntVar(&opts.edsCanaryAutoPauseMaxRestarts, "edsCanaryAutoPauseMaxRestarts", defaultCanaryAutoPauseMaxRestarts, "ExtendedDaemonset canary auto pause max restart count") flag.BoolVar(&opts.edsCanaryAutoFailEnabled, "edsCanaryAutoFailEnabled", defaultCanaryAutoFailEnabled, "ExtendedDaemonset canary auto fail enabled") flag.IntVar(&opts.edsCanaryAutoFailMaxRestarts, "edsCanaryAutoFailMaxRestarts", defaultCanaryAutoFailMaxRestarts, "ExtendedDaemonset canary auto fail max restart count") + flag.DurationVar(&opts.edsCanaryAutoPauseMaxSlowStartDuration, "edsCanaryAutoPauseMaxSlowStartDuration", defaultCanaryAutoPauseMaxSlowStartDuration*time.Minute, "ExtendedDaemonset canary max slow start duration") // Parsing flags flag.Parse() @@ -247,15 +250,16 @@ func run(opts *options) error { options := controllers.SetupOptions{ SupportExtendedDaemonset: controllers.ExtendedDaemonsetOptions{ - Enabled: opts.supportExtendedDaemonset, - MaxPodUnavailable: opts.edsMaxPodUnavailable, - CanaryDuration: opts.edsCanaryDuration, - CanaryReplicas: opts.edsCanaryReplicas, - CanaryAutoPauseEnabled: opts.edsCanaryAutoPauseEnabled, - CanaryAutoPauseMaxRestarts: opts.edsCanaryAutoPauseMaxRestarts, - CanaryAutoFailEnabled: opts.edsCanaryAutoFailEnabled, - CanaryAutoFailMaxRestarts: opts.edsCanaryAutoFailMaxRestarts, - MaxPodSchedulerFailure: opts.edsMaxPodSchedulerFailure, + Enabled: opts.supportExtendedDaemonset, + MaxPodUnavailable: opts.edsMaxPodUnavailable, + CanaryDuration: opts.edsCanaryDuration, + CanaryReplicas: opts.edsCanaryReplicas, + CanaryAutoPauseEnabled: opts.edsCanaryAutoPauseEnabled, + CanaryAutoPauseMaxRestarts: opts.edsCanaryAutoPauseMaxRestarts, + CanaryAutoFailEnabled: opts.edsCanaryAutoFailEnabled, + CanaryAutoFailMaxRestarts: opts.edsCanaryAutoFailMaxRestarts, + CanaryAutoPauseMaxSlowStartDuration: opts.edsCanaryAutoPauseMaxSlowStartDuration, + MaxPodSchedulerFailure: opts.edsMaxPodSchedulerFailure, }, SupportCilium: opts.supportCilium, Creds: creds,