From 6edbcaf548328eba1b3a4e724c0be69e7b879225 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Mon, 4 Nov 2024 20:48:23 +0100 Subject: [PATCH] feat: add operator controller to add ServiceMonitor Signed-off-by: Jan Fajerski --- .../observability-operator-cluster-role.yaml | 11 ++ go.mod | 5 +- go.sum | 2 + pkg/controllers/operator/components.go | 63 +++++++++ pkg/controllers/operator/controller.go | 126 ++++++++++++++++++ pkg/controllers/utils/finalizers.go | 28 ++++ pkg/operator/operator.go | 17 +++ 7 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 pkg/controllers/operator/components.go create mode 100644 pkg/controllers/operator/controller.go create mode 100644 pkg/controllers/utils/finalizers.go diff --git a/deploy/operator/observability-operator-cluster-role.yaml b/deploy/operator/observability-operator-cluster-role.yaml index 2933e19f1..2cf69f902 100644 --- a/deploy/operator/observability-operator-cluster-role.yaml +++ b/deploy/operator/observability-operator-cluster-role.yaml @@ -130,6 +130,17 @@ rules: - create - get - update +- apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - list + - patch + - update + - watch - apiGroups: - monitoring.rhobs resources: diff --git a/go.mod b/go.mod index 6983e4995..52b332c5c 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,15 @@ module github.com/rhobs/observability-operator -go 1.22.7 +go 1.23 + +toolchain go1.23.1 require ( github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 github.com/openshift/api v0.0.0-20240404200104-96ed2d49b255 github.com/pkg/errors v0.9.1 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.2 github.com/prometheus/common v0.60.1 github.com/rhobs/obo-prometheus-operator v0.77.1-rhobs1 github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.77.1-rhobs1 diff --git a/go.sum b/go.sum index 0f1d9bbeb..bdd264f03 100644 --- a/go.sum +++ b/go.sum @@ -202,6 +202,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus-community/prom-label-proxy v0.11.0 h1:IO02WiiFMfcIqvjhwMbCYnDJiTNcSHBrkCGRQ/7KDd0= github.com/prometheus-community/prom-label-proxy v0.11.0/go.mod h1:lfvrG70XqsxWDrSh1843QXBG0fSg8EbIXmAo8xGsvw8= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.2 h1:F/MALZ518KfI1zEg+Kg8/uTzoXKDyqw+LNC/5irJlJE= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.2/go.mod h1:D0KY8md81DQKdaR/cXwnhoWB3MYYyc/UjvqE8GFkIvA= github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= diff --git a/pkg/controllers/operator/components.go b/pkg/controllers/operator/components.go new file mode 100644 index 000000000..ab60665e6 --- /dev/null +++ b/pkg/controllers/operator/components.go @@ -0,0 +1,63 @@ +package operator_controller + +import ( + "fmt" + + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/rhobs/observability-operator/pkg/reconciler" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +const ( + name = "observability-operato" +) + +func operatorComponentReconcilers(owner metav1.Object, namespace string) []reconciler.Reconciler { + return []reconciler.Reconciler{ + reconciler.NewUpdater(newServiceMonitor(namespace), owner), + } +} + +func newServiceMonitor(namespace string) *monv1.ServiceMonitor { + return &monv1.ServiceMonitor{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monv1.SchemeGroupVersion.String(), + Kind: "ServiceMonitor", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/component": "operator", + "app.kubernetes.io/name": name, + "app.kubernetes.io/part-of": name, + "openshift.io/user-monitoring": "true", + }, + }, + + Spec: monv1.ServiceMonitorSpec{ + Endpoints: []monv1.Endpoint{ + { + Port: "metrics", + Scheme: "https", + TLSConfig: &monv1.TLSConfig{ + CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt", + KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key", + SafeTLSConfig: monv1.SafeTLSConfig{ + ServerName: ptr.To(fmt.Sprintf("%s.%s.svc", name, namespace)), + InsecureSkipVerify: ptr.To(false), + }, + }, + }, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/component": "operator", + "app.kubernetes.io/name": name, + }, + }, + }, + } +} diff --git a/pkg/controllers/operator/controller.go b/pkg/controllers/operator/controller.go new file mode 100644 index 000000000..34e4d7a83 --- /dev/null +++ b/pkg/controllers/operator/controller.go @@ -0,0 +1,126 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package operator_controller + +import ( + "context" + "time" + + "github.com/go-logr/logr" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type resourceManager struct { + k8sClient client.Client + scheme *runtime.Scheme + logger logr.Logger + controller controller.Controller + namespace string +} + +// RBAC for managing Prometheus Operator CRs +//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=list;watch;create;update;delete;patch + +// RegisterWithManager registers the controller with Manager +func RegisterWithManager(mgr ctrl.Manager, namespace string) error { + + rm := &resourceManager{ + k8sClient: mgr.GetClient(), + scheme: mgr.GetScheme(), + logger: ctrl.Log.WithName("observability-operator"), + namespace: namespace, + } + // We only want to trigger a reconciliation when the generation + // of a child changes. Until we need to update our the status for our own objects, + // we can save CPU cycles by avoiding reconciliations triggered by + // child status changes. + generationChanged := builder.WithPredicates(predicate.GenerationChangedPredicate{}) + + ctrl, err := ctrl.NewControllerManagedBy(mgr). + Owns(&monv1.ServiceMonitor{}, generationChanged). + Watches( + &appsv1.Deployment{}, + handler.EnqueueRequestsFromMapFunc(rm.operatorDeployment), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + ). + Build(rm) + + if err != nil { + return err + } + rm.controller = ctrl + return nil +} + +func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := rm.logger.WithValues("operator", req.NamespacedName) + logger.Info("Reconciling operator resources") + + op := &appsv1.Deployment{} + err := rm.k8sClient.Get(ctx, req.NamespacedName, op) + if errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + if err != nil { + return ctrl.Result{}, err + } + + reconcilers := operatorComponentReconcilers(op, rm.namespace) + for _, reconciler := range reconcilers { + err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme) + // handle create / update errors that can happen due to a stale cache by + // retrying after some time. + if errors.IsAlreadyExists(err) || errors.IsConflict(err) { + logger.V(3).Info("skipping reconcile error", "err", err) + return ctrl.Result{RequeueAfter: 2 * time.Second}, nil + } + if err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} +func (rm resourceManager) operatorDeployment(ctx context.Context, ms client.Object) []reconcile.Request { + var requests []reconcile.Request + op := &appsv1.Deployment{} + err := rm.k8sClient.Get(ctx, types.NamespacedName{Name: "observability-operator", Namespace: rm.namespace}, op) + if errors.IsNotFound(err) { + return requests + } + if err != nil { + return requests + } + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: op.GetName(), + Namespace: op.GetNamespace(), + }, + }) + return requests +} diff --git a/pkg/controllers/utils/finalizers.go b/pkg/controllers/utils/finalizers.go new file mode 100644 index 000000000..3ad1ff4b3 --- /dev/null +++ b/pkg/controllers/utils/finalizers.go @@ -0,0 +1,28 @@ +package utils + +import ( + "slices" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Add finalizer if not present +func AddFinalizer(obj client.Object, finalizerName string) client.Object { + finalizers := obj.GetFinalizers() + if !slices.Contains(finalizers, finalizerName) { + finalizers = append(finalizers, finalizerName) + obj.SetFinalizers(finalizers) + } + return obj +} + +func RemoveFinalizer(obj client.Object, finalizerName string) client.Object { + finalizers := obj.GetFinalizers() + if slices.Contains(finalizers, finalizerName) { + finalizers = slices.DeleteFunc(finalizers, func(currentFinalizerName string) bool { + return currentFinalizerName == finalizerName + }) + obj.SetFinalizers(finalizers) + } + return obj +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 964234503..2fef64e6b 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -20,6 +20,7 @@ import ( stackctrl "github.com/rhobs/observability-operator/pkg/controllers/monitoring/monitoring-stack" tqctrl "github.com/rhobs/observability-operator/pkg/controllers/monitoring/thanos-querier" + opctrl "github.com/rhobs/observability-operator/pkg/controllers/operator" uictrl "github.com/rhobs/observability-operator/pkg/controllers/uiplugin" ) @@ -59,6 +60,7 @@ type OperatorConfiguration struct { ThanosQuerier tqctrl.ThanosConfiguration UIPlugins uictrl.UIPluginsConfiguration FeatureGates FeatureGates + Namespace string } func WithPrometheusImage(image string) func(*OperatorConfiguration) { @@ -110,6 +112,12 @@ func WithFeatureGates(featureGates FeatureGates) func(*OperatorConfiguration) { } } +func WithNamespace(ns string) func(*OperatorConfiguration) { + return func(oc *OperatorConfiguration) { + oc.Namespace = ns + } +} + func NewOperatorConfiguration(opts ...func(*OperatorConfiguration)) *OperatorConfiguration { cfg := &OperatorConfiguration{} for _, o := range opts { @@ -228,6 +236,15 @@ func New(ctx context.Context, cfg *OperatorConfiguration) (*Operator, error) { setupLog.Info("OpenShift feature gate is disabled, UIPlugins are not enabled") } + if cfg.FeatureGates.OpenShift.Enabled { + if err := opctrl.RegisterWithManager(mgr, cfg.Namespace); err != nil { + return nil, fmt.Errorf("unable to register operator controller: %w", err) + } + } else { + setupLog := ctrl.Log.WithName("setup") + setupLog.Info("OpenShift feature gate is disabled, Operator controller is not enabled") + } + if err := mgr.AddHealthzCheck("health probe", healthz.Ping); err != nil { return nil, fmt.Errorf("unable to add health probe: %w", err) }