Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ACM-8548] Added service and servicemonitor to collect MCE operator metrics #550

Merged
merged 1 commit into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions api/v1/multiclusterengine_methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ var MCEComponents = []string{

var LegacyPrometheusKind = []string{"PrometheusRule", "ServiceMonitor"}

// MCEPrometheusRules is a map that associates certain component names with their corresponding prometheus rules.
var MCEPrometheusRules = map[string]string{
// MCELegacyPrometheusRules is a map that associates certain component names with their corresponding prometheus rules.
var MCELegacyPrometheusRules = map[string]string{
ConsoleMCE: "acm-console-prometheus-rules",
// Add other components here when PrometheusRules is required.
}

// MCEServiceMonitors is a map that associates certain component names with their corresponding service monitors.
var MCEServiceMonitors = map[string]string{
// MCELegacyServiceMonitors is a map that associates certain component names with their corresponding service monitors.
var MCELegacyServiceMonitors = map[string]string{
ClusterLifecycle: "clusterlifecycle-state-metrics-v2",
ConsoleMCE: "console-mce-monitor",
// Add other components here when ServiceMonitors is required.
Expand Down Expand Up @@ -217,18 +217,18 @@ func GetLegacyPrometheusKind() []string {
return LegacyPrometheusKind
}

// GetPrometheusRulesName returns the name of the PrometheusRules based on the provided component name.
func GetPrometheusRulesName(component string) (string, error) {
if val, ok := MCEPrometheusRules[component]; !ok {
// GetLegacyPrometheusRulesName returns the name of the PrometheusRules based on the provided component name.
func GetLegacyPrometheusRulesName(component string) (string, error) {
if val, ok := MCELegacyPrometheusRules[component]; !ok {
return val, fmt.Errorf("failed to find PrometheusRules name for: %s component", component)
} else {
return val, nil
}
}

// GetServiceMonitorName returns the name of the ServiceMonitors based on the provided component name.
func GetServiceMonitorName(component string) (string, error) {
if val, ok := MCEServiceMonitors[component]; !ok {
// GetLegacyServiceMonitorName returns the name of the ServiceMonitors based on the provided component name.
func GetLegacyServiceMonitorName(component string) (string, error) {
if val, ok := MCELegacyServiceMonitors[component]; !ok {
return val, fmt.Errorf("failed to find ServiceMonitors name for: %s component", component)
} else {
return val, nil
Expand Down
24 changes: 12 additions & 12 deletions api/v1/multiclusterengine_methods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func TestGetLegacyPrometheusKind(t *testing.T) {
}
}

func TestGetPrometheusRulesName(t *testing.T) {
func TestGetLegacyPrometheusRulesName(t *testing.T) {
tests := []struct {
name string
component string
Expand All @@ -118,30 +118,30 @@ func TestGetPrometheusRulesName(t *testing.T) {
{
name: "console PrometheusRule",
component: api.ConsoleMCE,
want: api.MCEPrometheusRules[api.ConsoleMCE],
want: api.MCELegacyPrometheusRules[api.ConsoleMCE],
},
{
name: "unknown PrometheusRule",
component: "unknown",
want: api.MCEPrometheusRules["unknown"],
want: api.MCELegacyPrometheusRules["unknown"],
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := api.GetPrometheusRulesName(tt.component)
got, err := api.GetLegacyPrometheusRulesName(tt.component)
if err != nil && tt.component != "unknown" {
t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
}

if got != tt.want {
t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want)
t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want)
}
})
}
}

func TestGetServiceMonitorName(t *testing.T) {
func TestGetLegacyServiceMonitorName(t *testing.T) {
tests := []struct {
name string
component string
Expand All @@ -150,24 +150,24 @@ func TestGetServiceMonitorName(t *testing.T) {
{
name: "console ServiceMonitor",
component: api.ConsoleMCE,
want: api.MCEServiceMonitors[api.ConsoleMCE],
want: api.MCELegacyServiceMonitors[api.ConsoleMCE],
},
{
name: "unknown ServiceMonitor",
component: "unknown",
want: api.MCEServiceMonitors["unknown"],
want: api.MCELegacyServiceMonitors["unknown"],
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := api.GetServiceMonitorName(tt.component)
got, err := api.GetLegacyServiceMonitorName(tt.component)
if err != nil && tt.component != "unknown" {
t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
}

if got != tt.want {
t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want)
t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want)
}
})
}
Expand Down
155 changes: 152 additions & 3 deletions controllers/backplaneconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand Down Expand Up @@ -78,6 +79,8 @@ const (

trustBundleNameEnvVar = "TRUSTED_CA_BUNDLE"
defaultTrustBundleName = "trusted-ca-bundle"

controlPlane = "backplane-operator"
)

//+kubebuilder:rbac:groups=multicluster.openshift.io,resources=multiclusterengines,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -234,7 +237,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
MultiClusterEngine to avoid conflicts with the openshift-* namespace when deploying PrometheusRules and
ServiceMonitors in ACM and MCE.
*/
result, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig)
_, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig)
if err != nil {
log.Error(err, "Failed to add to %s label to namespace: %s", utils.OpenShiftClusterMonitoringLabel,
backplaneConfig.Spec.TargetNamespace)
Expand Down Expand Up @@ -307,7 +310,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
}

for _, kind := range backplanev1.GetLegacyPrometheusKind() {
err = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind)
_ = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind)
}

result, err = r.ensureToggleableComponents(ctx, backplaneConfig)
Expand All @@ -320,6 +323,16 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
return result, err
}

result, err = r.createMetricsService(ctx, backplaneConfig)
if err != nil {
return result, err
}

result, err = r.createMetricsServiceMonitor(ctx, backplaneConfig)
if err != nil {
return result, err
}

result, err = r.ensureRemovalsGone(backplaneConfig)
if err != nil {
return result, err
Expand Down Expand Up @@ -440,7 +453,8 @@ func (r *MultiClusterEngineReconciler) SetupWithManager(mgr ctrl.Manager) error

// createTrustBundleConfigmap creates a configmap that will be injected with the
// trusted CA bundle for use with the OCP cluster wide proxy
func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context, mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

// Get Trusted Bundle configmap name
Expand Down Expand Up @@ -494,6 +508,141 @@ func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Co
return ctrl.Result{}, nil
}

func (r *MultiClusterEngineReconciler) createMetricsService(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

const Port = 8080

sName := utils.MCEOperatorMetricsServiceName
sNamespace := mce.Spec.TargetNamespace

namespacedName := types.NamespacedName{
Name: sName,
Namespace: sNamespace,
}

// Check if service exists
if err := r.Client.Get(ctx, namespacedName, &corev1.Service{}); err != nil {
if !apierrors.IsNotFound(err) {
// Unknown error. Requeue
log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s",
sNamespace, sName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

// Create metrics service
s := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: sName,
Namespace: sNamespace,
Labels: map[string]string{
"control-plane": controlPlane,
},
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
Name: "metrics",
Port: int32(Port),
Protocol: "TCP",
TargetPort: intstr.FromInt(Port),
},
},
Selector: map[string]string{
"control-plane": controlPlane,
},
},
}

if err = ctrl.SetControllerReference(mce, s, r.Scheme); err != nil {
return ctrl.Result{}, pkgerrors.Wrapf(
err, "error setting controller reference on metrics service: %s", sName,
)
}

if err = r.Client.Create(ctx, s); err != nil {
// Error creating metrics service
log.Error(err, fmt.Sprintf("error creating multicluster-engine metrics service: %s", sName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

log.Info(fmt.Sprintf("Created multicluster-engine metrics service: %s", sName))
}

return ctrl.Result{}, nil
}

func (r *MultiClusterEngineReconciler) createMetricsServiceMonitor(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

smName := utils.MCEOperatorMetricsServiceMonitorName
smNamespace := mce.Spec.TargetNamespace

namespacedName := types.NamespacedName{
Name: smName,
Namespace: smNamespace,
}

// Check if service exists
if err := r.Client.Get(ctx, namespacedName, &monitorv1.ServiceMonitor{}); err != nil {
if !apierrors.IsNotFound(err) {
// Unknown error. Requeue
log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s",
smNamespace, smName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

// Create metrics service
sm := &monitorv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: smName,
Namespace: smNamespace,
Labels: map[string]string{
"control-plane": controlPlane,
},
},
Spec: monitorv1.ServiceMonitorSpec{
Endpoints: []monitorv1.Endpoint{
{
BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
BearerTokenSecret: corev1.SecretKeySelector{
Key: "",
},
Port: "metrics",
},
},
NamespaceSelector: monitorv1.NamespaceSelector{
MatchNames: []string{
mce.Spec.TargetNamespace,
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"control-plane": controlPlane,
},
},
},
}

if err = ctrl.SetControllerReference(mce, sm, r.Scheme); err != nil {
return ctrl.Result{}, pkgerrors.Wrapf(
err, "error setting controller reference on multicluster-engine metrics servicemonitor: %s", smName)
}

if err = r.Client.Create(ctx, sm); err != nil {
// Error creating metrics servicemonitor
log.Error(err, fmt.Sprintf("error creating metrics servicemonitor: %s", smName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

log.Info(fmt.Sprintf("Created multicluster-engine metrics servicemonitor: %s", smName))
}

return ctrl.Result{}, nil
}

// DeployAlwaysSubcomponents ensures all subcomponents exist
func (r *MultiClusterEngineReconciler) DeployAlwaysSubcomponents(ctx context.Context, backplaneConfig *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)
Expand Down
4 changes: 2 additions & 2 deletions controllers/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ func (r *MultiClusterEngineReconciler) removeLegacyPrometheusConfigurations(ctx
for _, c := range backplanev1.MCEComponents {
res, err := func() (string, error) {
if configType == "PrometheusRule" {
return backplanev1.GetPrometheusRulesName(c)
return backplanev1.GetLegacyPrometheusRulesName(c)
}
return backplanev1.GetServiceMonitorName(c)
return backplanev1.GetLegacyServiceMonitorName(c)
}()

if err != nil {
Expand Down
14 changes: 14 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ const (
OpenShiftClusterMonitoringLabel = "openshift.io/cluster-monitoring"
)

const (
/*
MCEOperatorMetricsServiceName is the name of the service used to expose the metrics
endpoint for the multicluster-engine-operator.
*/
MCEOperatorMetricsServiceName = "multicluster-engine-operator-metrics"

/*
MCEOperatorMetricsServiceMonitorName is the name of the service monitor used to expose
the metrics for the multicluster-engine-operator.
*/
MCEOperatorMetricsServiceMonitorName = "multicluster-engine-operator-metrics"
)

var onComponents = []string{
backplanev1.AssistedService,
backplanev1.ClusterLifecycle,
Expand Down
Loading