From fe8a6afed306a3fd3b3a0e8d63638fff310bd9bf Mon Sep 17 00:00:00 2001 From: Disaiah Bennett Date: Thu, 16 Nov 2023 14:34:03 -0500 Subject: [PATCH] added service and servicemonitor to collect MCE operator metrics Signed-off-by: Disaiah Bennett --- api/v1/multiclusterengine_methods.go | 20 +-- api/v1/multiclusterengine_methods_test.go | 24 ++-- controllers/backplaneconfig_controller.go | 155 +++++++++++++++++++++- controllers/uninstall.go | 4 +- pkg/utils/utils.go | 14 ++ 5 files changed, 190 insertions(+), 27 deletions(-) diff --git a/api/v1/multiclusterengine_methods.go b/api/v1/multiclusterengine_methods.go index 11ee33bb..6d618035 100644 --- a/api/v1/multiclusterengine_methods.go +++ b/api/v1/multiclusterengine_methods.go @@ -71,14 +71,14 @@ var MCEComponents = []string{ var LegacyPrometheusKind = []string{"PrometheusRule", "ServiceMonitor"} -// MCEPrometheusRules is a map that associates certain component names with their corresponding prometheus rules. -var MCEPrometheusRules = map[string]string{ +// MCELegacyPrometheusRules is a map that associates certain component names with their corresponding prometheus rules. +var MCELegacyPrometheusRules = map[string]string{ ConsoleMCE: "acm-console-prometheus-rules", // Add other components here when PrometheusRules is required. } -// MCEServiceMonitors is a map that associates certain component names with their corresponding service monitors. -var MCEServiceMonitors = map[string]string{ +// MCELegacyServiceMonitors is a map that associates certain component names with their corresponding service monitors. +var MCELegacyServiceMonitors = map[string]string{ ClusterLifecycle: "clusterlifecycle-state-metrics-v2", ConsoleMCE: "console-mce-monitor", // Add other components here when ServiceMonitors is required. @@ -217,18 +217,18 @@ func GetLegacyPrometheusKind() []string { return LegacyPrometheusKind } -// GetPrometheusRulesName returns the name of the PrometheusRules based on the provided component name. -func GetPrometheusRulesName(component string) (string, error) { - if val, ok := MCEPrometheusRules[component]; !ok { +// GetLegacyPrometheusRulesName returns the name of the PrometheusRules based on the provided component name. +func GetLegacyPrometheusRulesName(component string) (string, error) { + if val, ok := MCELegacyPrometheusRules[component]; !ok { return val, fmt.Errorf("failed to find PrometheusRules name for: %s component", component) } else { return val, nil } } -// GetServiceMonitorName returns the name of the ServiceMonitors based on the provided component name. -func GetServiceMonitorName(component string) (string, error) { - if val, ok := MCEServiceMonitors[component]; !ok { +// GetLegacyServiceMonitorName returns the name of the ServiceMonitors based on the provided component name. +func GetLegacyServiceMonitorName(component string) (string, error) { + if val, ok := MCELegacyServiceMonitors[component]; !ok { return val, fmt.Errorf("failed to find ServiceMonitors name for: %s component", component) } else { return val, nil diff --git a/api/v1/multiclusterengine_methods_test.go b/api/v1/multiclusterengine_methods_test.go index a06a6a55..42cfd98d 100644 --- a/api/v1/multiclusterengine_methods_test.go +++ b/api/v1/multiclusterengine_methods_test.go @@ -109,7 +109,7 @@ func TestGetLegacyPrometheusKind(t *testing.T) { } } -func TestGetPrometheusRulesName(t *testing.T) { +func TestGetLegacyPrometheusRulesName(t *testing.T) { tests := []struct { name string component string @@ -118,30 +118,30 @@ func TestGetPrometheusRulesName(t *testing.T) { { name: "console PrometheusRule", component: api.ConsoleMCE, - want: api.MCEPrometheusRules[api.ConsoleMCE], + want: api.MCELegacyPrometheusRules[api.ConsoleMCE], }, { name: "unknown PrometheusRule", component: "unknown", - want: api.MCEPrometheusRules["unknown"], + want: api.MCELegacyPrometheusRules["unknown"], }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := api.GetPrometheusRulesName(tt.component) + got, err := api.GetLegacyPrometheusRulesName(tt.component) if err != nil && tt.component != "unknown" { - t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want) + t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want) } if got != tt.want { - t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want) + t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want) } }) } } -func TestGetServiceMonitorName(t *testing.T) { +func TestGetLegacyServiceMonitorName(t *testing.T) { tests := []struct { name string component string @@ -150,24 +150,24 @@ func TestGetServiceMonitorName(t *testing.T) { { name: "console ServiceMonitor", component: api.ConsoleMCE, - want: api.MCEServiceMonitors[api.ConsoleMCE], + want: api.MCELegacyServiceMonitors[api.ConsoleMCE], }, { name: "unknown ServiceMonitor", component: "unknown", - want: api.MCEServiceMonitors["unknown"], + want: api.MCELegacyServiceMonitors["unknown"], }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := api.GetServiceMonitorName(tt.component) + got, err := api.GetLegacyServiceMonitorName(tt.component) if err != nil && tt.component != "unknown" { - t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want) + t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want) } if got != tt.want { - t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want) + t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want) } }) } diff --git a/controllers/backplaneconfig_controller.go b/controllers/backplaneconfig_controller.go index d18952fd..a5bbda59 100644 --- a/controllers/backplaneconfig_controller.go +++ b/controllers/backplaneconfig_controller.go @@ -47,6 +47,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -78,6 +79,8 @@ const ( trustBundleNameEnvVar = "TRUSTED_CA_BUNDLE" defaultTrustBundleName = "trusted-ca-bundle" + + controlPlane = "backplane-operator" ) //+kubebuilder:rbac:groups=multicluster.openshift.io,resources=multiclusterengines,verbs=get;list;watch;create;update;patch;delete @@ -234,7 +237,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R MultiClusterEngine to avoid conflicts with the openshift-* namespace when deploying PrometheusRules and ServiceMonitors in ACM and MCE. */ - result, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig) + _, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig) if err != nil { log.Error(err, "Failed to add to %s label to namespace: %s", utils.OpenShiftClusterMonitoringLabel, backplaneConfig.Spec.TargetNamespace) @@ -307,7 +310,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R } for _, kind := range backplanev1.GetLegacyPrometheusKind() { - err = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind) + _ = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind) } result, err = r.ensureToggleableComponents(ctx, backplaneConfig) @@ -320,6 +323,16 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R return result, err } + result, err = r.createMetricsService(ctx, backplaneConfig) + if err != nil { + return result, err + } + + result, err = r.createMetricsServiceMonitor(ctx, backplaneConfig) + if err != nil { + return result, err + } + result, err = r.ensureRemovalsGone(backplaneConfig) if err != nil { return result, err @@ -440,7 +453,8 @@ func (r *MultiClusterEngineReconciler) SetupWithManager(mgr ctrl.Manager) error // createTrustBundleConfigmap creates a configmap that will be injected with the // trusted CA bundle for use with the OCP cluster wide proxy -func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context, mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) { +func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context, + mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) { log := log.FromContext(ctx) // Get Trusted Bundle configmap name @@ -494,6 +508,141 @@ func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Co return ctrl.Result{}, nil } +func (r *MultiClusterEngineReconciler) createMetricsService(ctx context.Context, + mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) { + log := log.FromContext(ctx) + + const Port = 8080 + + sName := utils.MCEOperatorMetricsServiceName + sNamespace := mce.Spec.TargetNamespace + + namespacedName := types.NamespacedName{ + Name: sName, + Namespace: sNamespace, + } + + // Check if service exists + if err := r.Client.Get(ctx, namespacedName, &corev1.Service{}); err != nil { + if !apierrors.IsNotFound(err) { + // Unknown error. Requeue + log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s", + sNamespace, sName)) + return ctrl.Result{RequeueAfter: requeuePeriod}, err + } + + // Create metrics service + s := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: sName, + Namespace: sNamespace, + Labels: map[string]string{ + "control-plane": controlPlane, + }, + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{ + { + Name: "metrics", + Port: int32(Port), + Protocol: "TCP", + TargetPort: intstr.FromInt(Port), + }, + }, + Selector: map[string]string{ + "control-plane": controlPlane, + }, + }, + } + + if err = ctrl.SetControllerReference(mce, s, r.Scheme); err != nil { + return ctrl.Result{}, pkgerrors.Wrapf( + err, "error setting controller reference on metrics service: %s", sName, + ) + } + + if err = r.Client.Create(ctx, s); err != nil { + // Error creating metrics service + log.Error(err, fmt.Sprintf("error creating multicluster-engine metrics service: %s", sName)) + return ctrl.Result{RequeueAfter: requeuePeriod}, err + } + + log.Info(fmt.Sprintf("Created multicluster-engine metrics service: %s", sName)) + } + + return ctrl.Result{}, nil +} + +func (r *MultiClusterEngineReconciler) createMetricsServiceMonitor(ctx context.Context, + mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) { + log := log.FromContext(ctx) + + smName := utils.MCEOperatorMetricsServiceMonitorName + smNamespace := mce.Spec.TargetNamespace + + namespacedName := types.NamespacedName{ + Name: smName, + Namespace: smNamespace, + } + + // Check if service exists + if err := r.Client.Get(ctx, namespacedName, &monitorv1.ServiceMonitor{}); err != nil { + if !apierrors.IsNotFound(err) { + // Unknown error. Requeue + log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s", + smNamespace, smName)) + return ctrl.Result{RequeueAfter: requeuePeriod}, err + } + + // Create metrics service + sm := &monitorv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: smName, + Namespace: smNamespace, + Labels: map[string]string{ + "control-plane": controlPlane, + }, + }, + Spec: monitorv1.ServiceMonitorSpec{ + Endpoints: []monitorv1.Endpoint{ + { + BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", + BearerTokenSecret: corev1.SecretKeySelector{ + Key: "", + }, + Port: "metrics", + }, + }, + NamespaceSelector: monitorv1.NamespaceSelector{ + MatchNames: []string{ + mce.Spec.TargetNamespace, + }, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "control-plane": controlPlane, + }, + }, + }, + } + + if err = ctrl.SetControllerReference(mce, sm, r.Scheme); err != nil { + return ctrl.Result{}, pkgerrors.Wrapf( + err, "error setting controller reference on multicluster-engine metrics servicemonitor: %s", smName) + } + + if err = r.Client.Create(ctx, sm); err != nil { + // Error creating metrics servicemonitor + log.Error(err, fmt.Sprintf("error creating metrics servicemonitor: %s", smName)) + return ctrl.Result{RequeueAfter: requeuePeriod}, err + } + + log.Info(fmt.Sprintf("Created multicluster-engine metrics servicemonitor: %s", smName)) + } + + return ctrl.Result{}, nil +} + // DeployAlwaysSubcomponents ensures all subcomponents exist func (r *MultiClusterEngineReconciler) DeployAlwaysSubcomponents(ctx context.Context, backplaneConfig *backplanev1.MultiClusterEngine) (ctrl.Result, error) { log := log.FromContext(ctx) diff --git a/controllers/uninstall.go b/controllers/uninstall.go index 05181d71..d4854861 100644 --- a/controllers/uninstall.go +++ b/controllers/uninstall.go @@ -167,9 +167,9 @@ func (r *MultiClusterEngineReconciler) removeLegacyPrometheusConfigurations(ctx for _, c := range backplanev1.MCEComponents { res, err := func() (string, error) { if configType == "PrometheusRule" { - return backplanev1.GetPrometheusRulesName(c) + return backplanev1.GetLegacyPrometheusRulesName(c) } - return backplanev1.GetServiceMonitorName(c) + return backplanev1.GetLegacyServiceMonitorName(c) }() if err != nil { diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 716efe99..c13208fa 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -20,6 +20,20 @@ const ( OpenShiftClusterMonitoringLabel = "openshift.io/cluster-monitoring" ) +const ( + /* + MCEOperatorMetricsServiceName is the name of the service used to expose the metrics + endpoint for the multicluster-engine-operator. + */ + MCEOperatorMetricsServiceName = "multicluster-engine-operator-metrics" + + /* + MCEOperatorMetricsServiceMonitorName is the name of the service monitor used to expose + the metrics for the multicluster-engine-operator. + */ + MCEOperatorMetricsServiceMonitorName = "multicluster-engine-operator-metrics" +) + var onComponents = []string{ backplanev1.AssistedService, backplanev1.ClusterLifecycle,