diff --git a/config.go b/config.go index ff9d859..11e4057 100644 --- a/config.go +++ b/config.go @@ -66,6 +66,11 @@ type Config struct { // LegacyNamespaceQuota is the default quota for namespaces if no ZoneUsageProfile is selected. LegacyNamespaceQuota int + + // PodRunOnceActiveDeadlineSecondsOverrideAnnotation is the annotation used to override the activeDeadlineSeconds for RunOnce pods. + PodRunOnceActiveDeadlineSecondsOverrideAnnotation string + // PodRunOnceActiveDeadlineSecondsDefault is the default activeDeadlineSeconds for RunOnce pods. + PodRunOnceActiveDeadlineSecondsDefault int } func ConfigFromFile(path string) (c Config, warn []string, err error) { diff --git a/config.yaml b/config.yaml index 685c936..4166e73 100644 --- a/config.yaml +++ b/config.yaml @@ -47,3 +47,8 @@ AllowedAnnotations: [appuio.io/default-node-selector] # AllowedLabels is a list of labels that are allowed on namespaces. # Supports '*' and '?' wildcards. AllowedLabels: [appuio.io/organization] + +# PodRunOnceActiveDeadlineSecondsOverrideAnnotation is the annotation used to override the activeDeadlineSeconds for RunOnce pods. +PodRunOnceActiveDeadlineSecondsOverrideAnnotation: appuio.io/active-deadline-seconds-override +# PodRunOnceActiveDeadlineSecondsDefault is the default activeDeadlineSeconds for RunOnce pods. +PodRunOnceActiveDeadlineSecondsDefault: 1800 diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index a7ae5c1..c84b606 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -46,6 +46,27 @@ webhooks: resources: - namespaces sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-pod-run-once-active-deadline + failurePolicy: Fail + matchPolicy: Equivalent + name: pod-run-once-active-deadline-mutator.appuio.io + reinvocationPolicy: IfNeeded + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/main.go b/main.go index d101335..0857e1c 100644 --- a/main.go +++ b/main.go @@ -89,6 +89,9 @@ func main() { var legacyNamespaceQuotaEnabled bool flag.BoolVar(&legacyNamespaceQuotaEnabled, "legacy-namespace-quota-enabled", false, "Enable the legacy namespace quota controller. This controller is deprecated and will be removed in the future.") + var podRunOnceActiveDeadlineSecondsMutatorEnabled bool + flag.BoolVar(&podRunOnceActiveDeadlineSecondsMutatorEnabled, "pod-run-once-active-deadline-seconds-mutator-enabled", false, "Enable the PodRunOnceActiveDeadlineSecondsMutator webhook. Adds .spec.activeDeadlineSeconds to pods with the restartPolicy set to 'OnFailure' or 'Never'.") + var qps, burst int flag.IntVar(&qps, "qps", 20, "QPS to use for the controller-runtime client") flag.IntVar(&burst, "burst", 100, "Burst to use for the controller-runtime client") @@ -296,6 +299,18 @@ func main() { }, }) + mgr.GetWebhookServer().Register("/mutate-pod-run-once-active-deadline", &webhook.Admission{ + Handler: &webhooks.PodRunOnceActiveDeadlineSecondsMutator{ + Decoder: admission.NewDecoder(mgr.GetScheme()), + Client: mgr.GetClient(), + + Skipper: skipper.StaticSkipper{ShouldSkip: !podRunOnceActiveDeadlineSecondsMutatorEnabled}, + + OverrideAnnotation: conf.PodRunOnceActiveDeadlineSecondsOverrideAnnotation, + DefaultActiveDeadlineSeconds: conf.PodRunOnceActiveDeadlineSecondsDefault, + }, + }) + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { setupLog.Error(err, "unable to setup health endpoint") os.Exit(1) diff --git a/webhooks/pod_node_selector_mutator_test.go b/webhooks/pod_node_selector_mutator_test.go index 6d28002..6dcc185 100644 --- a/webhooks/pod_node_selector_mutator_test.go +++ b/webhooks/pod_node_selector_mutator_test.go @@ -93,7 +93,7 @@ func Test_PodNodeSelectorMutator_Handle(t *testing.T) { DefaultNamespaceNodeSelectorAnnotation: nodeSelAnnotation, } - pod := newPod(tc.namespace, "test", tc.nodeSelector) + pod := newPodWithNodeSelector(tc.namespace, "test", tc.nodeSelector) resp := subject.Handle(context.Background(), admissionRequestForObject(t, pod, scheme)) t.Log("Response:", resp.Result.Reason, resp.Result.Message) require.ElementsMatch(t, tc.patch, resp.Patches) diff --git a/webhooks/pod_runonce_active_deadline_seconds_mutator.go b/webhooks/pod_runonce_active_deadline_seconds_mutator.go new file mode 100644 index 0000000..2d94e07 --- /dev/null +++ b/webhooks/pod_runonce_active_deadline_seconds_mutator.go @@ -0,0 +1,92 @@ +package webhooks + +import ( + "context" + "fmt" + "net/http" + "strconv" + + "gomodules.xyz/jsonpatch/v2" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/appuio/appuio-cloud-agent/skipper" +) + +// +kubebuilder:webhook:path=/mutate-pod-run-once-active-deadline,name=pod-run-once-active-deadline-mutator.appuio.io,admissionReviewVersions=v1,sideEffects=none,mutating=true,failurePolicy=Fail,groups="",resources=pods,verbs=create,versions=v1,matchPolicy=equivalent,reinvocationPolicy=IfNeeded +//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch + +// PodRunOnceActiveDeadlineSecondsMutator adds .spec.activeDeadlineSeconds to pods with the restartPolicy set to "OnFailure" or "Never". +type PodRunOnceActiveDeadlineSecondsMutator struct { + Decoder admission.Decoder + + // Client is used to fetch namespace metadata for the override annotation + Client client.Reader + + // DefaultNamespaceNodeSelectorAnnotation is the annotation to use for the default node selector + OverrideAnnotation string + + // DefaultActiveDeadlineSeconds is the default activeDeadlineSeconds to apply to pods + DefaultActiveDeadlineSeconds int + + Skipper skipper.Skipper +} + +// Handle handles the admission requests +func (m *PodRunOnceActiveDeadlineSecondsMutator) Handle(ctx context.Context, req admission.Request) admission.Response { + ctx = log.IntoContext(ctx, log.FromContext(ctx). + WithName("webhook.pod-run-once-active-deadline-mutator.appuio.io"). + WithValues("id", req.UID, "user", req.UserInfo.Username). + WithValues("operation", req.Operation). + WithValues("namespace", req.Namespace, "name", req.Name, + "group", req.Kind.Group, "version", req.Kind.Version, "kind", req.Kind.Kind)) + + return logAdmissionResponse(ctx, m.handle(ctx, req)) +} + +func (m *PodRunOnceActiveDeadlineSecondsMutator) handle(ctx context.Context, req admission.Request) admission.Response { + skip, err := m.Skipper.Skip(ctx, req) + if err != nil { + return admission.Errored(http.StatusInternalServerError, fmt.Errorf("error while checking skipper: %w", err)) + } + if skip { + return admission.Allowed("skipped") + } + + var pod corev1.Pod + if err := m.Decoder.Decode(req, &pod); err != nil { + return admission.Errored(http.StatusUnprocessableEntity, err) + } + + if pod.Spec.RestartPolicy != corev1.RestartPolicyOnFailure && pod.Spec.RestartPolicy != corev1.RestartPolicyNever { + return admission.Allowed(fmt.Sprintf("pod restart policy is %q, no activeDeadlineSeconds needed", pod.Spec.RestartPolicy)) + } + + if pod.Spec.ActiveDeadlineSeconds != nil { + return admission.Allowed("pod already has an activeDeadlineSeconds value") + } + + var ns corev1.Namespace + if err := m.Client.Get(ctx, client.ObjectKey{Name: req.Namespace}, &ns); err != nil { + return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to fetch namespace for override annotation: %w", err)) + } + + ads := m.DefaultActiveDeadlineSeconds + msg := fmt.Sprintf("added default activeDeadlineSeconds %d", ads) + if oa := ns.Annotations[m.OverrideAnnotation]; oa != "" { + parsed, err := strconv.Atoi(oa) + if err != nil { + return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to parse override annotation %q for namespace %q: %w", oa, req.Namespace, err)) + } + ads = parsed + msg = fmt.Sprintf("added activeDeadlineSeconds %d from override annotation %q", ads, m.OverrideAnnotation) + } + + return admission.Patched(msg, jsonpatch.Operation{ + Operation: "add", + Path: "/spec/restartPolicy", + Value: ads, + }) +} diff --git a/webhooks/pod_runonce_active_deadline_seconds_mutator_test.go b/webhooks/pod_runonce_active_deadline_seconds_mutator_test.go new file mode 100644 index 0000000..7ad9669 --- /dev/null +++ b/webhooks/pod_runonce_active_deadline_seconds_mutator_test.go @@ -0,0 +1,133 @@ +package webhooks + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/appuio/appuio-cloud-agent/skipper" +) + +func Test_PodRunOnceActiveDeadlineSecondsMutator_Handle(t *testing.T) { + const overrideAnnotation = "appuio.io/active-deadline-seconds-override" + const defaultActiveDeadlineSeconds = 60 + + testCases := []struct { + name string + + subject client.Object + additionalObjects []client.Object + + allowed bool + expectedActiveDeadlineSeconds int + }{ + { + name: "pod with restartPolicy=Always", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyAlways, + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, nil), + }, + allowed: true, + }, + { + name: "pod with restartPolicy=OnFailure", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyOnFailure, + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, nil), + }, + allowed: true, + expectedActiveDeadlineSeconds: defaultActiveDeadlineSeconds, + }, + { + name: "pod with restartPolicy=Never", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, nil), + }, + allowed: true, + expectedActiveDeadlineSeconds: defaultActiveDeadlineSeconds, + }, + { + name: "pod in namespace with override annotation", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, map[string]string{ + overrideAnnotation: "30", + }), + }, + allowed: true, + expectedActiveDeadlineSeconds: 30, + }, + { + name: "pod with existing activeDeadlineSeconds", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + ActiveDeadlineSeconds: ptr.To(int64(77)), + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, nil), + }, + allowed: true, + }, + { + name: "pod in namespace with invalid override annotation", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + }), + additionalObjects: []client.Object{ + newNamespace("testns", nil, map[string]string{ + overrideAnnotation: "invalid", + }), + }, + allowed: false, + }, + { + name: "non-existing namespace", + subject: newPodWithSpec("testns", "pod1", corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + }), + allowed: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + c, scheme, decoder := prepareClient(t, tc.additionalObjects...) + + subject := PodRunOnceActiveDeadlineSecondsMutator{ + Decoder: decoder, + Client: c, + Skipper: skipper.StaticSkipper{}, + + OverrideAnnotation: overrideAnnotation, + DefaultActiveDeadlineSeconds: defaultActiveDeadlineSeconds, + } + + resp := subject.Handle(context.Background(), admissionRequestForObject(t, tc.subject, scheme)) + t.Log("Response:", resp.Result.Reason, resp.Result.Message) + require.Equal(t, tc.allowed, resp.Allowed) + + if tc.expectedActiveDeadlineSeconds == 0 { + require.Len(t, resp.Patches, 0) + return + } + + require.Len(t, resp.Patches, 1) + require.Equal(t, tc.expectedActiveDeadlineSeconds, resp.Patches[0].Value) + }) + } +} diff --git a/webhooks/utils_test.go b/webhooks/utils_test.go index 979a844..9d77647 100644 --- a/webhooks/utils_test.go +++ b/webhooks/utils_test.go @@ -132,7 +132,13 @@ func prepareClient(t *testing.T, initObjs ...client.Object) (client.WithWatch, * return client, scheme, decoder } -func newPod(namespace, name string, nodeSelector map[string]string) *corev1.Pod { +func newPodWithNodeSelector(namespace, name string, nodeSelector map[string]string) *corev1.Pod { + return newPodWithSpec(namespace, name, corev1.PodSpec{ + NodeSelector: nodeSelector, + }) +} + +func newPodWithSpec(namespace, name string, spec corev1.PodSpec) *corev1.Pod { return &corev1.Pod{ TypeMeta: metav1.TypeMeta{ Kind: "Pod", @@ -142,8 +148,6 @@ func newPod(namespace, name string, nodeSelector map[string]string) *corev1.Pod Name: name, Namespace: namespace, }, - Spec: corev1.PodSpec{ - NodeSelector: nodeSelector, - }, + Spec: spec, } }