From cf31cf944cf317c2effe43e2aa26749f2e53f345 Mon Sep 17 00:00:00 2001 From: Slava Lysunkin Date: Mon, 4 Nov 2024 12:16:11 -0600 Subject: [PATCH] Added checking for MachineDeployments availability to update ManagedCluster status --- .../controller/managedcluster_controller.go | 71 +++++++++++++++---- internal/controller/management_controller.go | 2 +- internal/utils/status/status.go | 20 +++--- .../hmc/templates/rbac/controller/roles.yaml | 5 ++ 4 files changed, 74 insertions(+), 24 deletions(-) diff --git a/internal/controller/managedcluster_controller.go b/internal/controller/managedcluster_controller.go index c8d5840a5..97784d045 100644 --- a/internal/controller/managedcluster_controller.go +++ b/internal/controller/managedcluster_controller.go @@ -19,6 +19,7 @@ import ( "encoding/json" "errors" "fmt" + "slices" "strings" "time" @@ -56,6 +57,7 @@ import ( const ( DefaultRequeueInterval = 10 * time.Second + MaxChildObjects = 100 ) // ManagedClusterReconciler reconciles a ManagedCluster object @@ -104,16 +106,13 @@ func (r *ManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque return r.Update(ctx, managedCluster) } -func (r *ManagedClusterReconciler) setStatusFromClusterStatus( - ctx context.Context, managedCluster *hmc.ManagedCluster, -) (bool, error) { +func (r *ManagedClusterReconciler) setStatusFromChildObjects( + ctx context.Context, managedCluster *hmc.ManagedCluster, gvr schema.GroupVersionResource, conditions []string, +) (requeue bool, _ error) { l := ctrl.LoggerFrom(ctx) - resourceConditions, err := status.GetResourceConditions(ctx, managedCluster.Namespace, r.DynamicClient, schema.GroupVersionResource{ - Group: "cluster.x-k8s.io", - Version: "v1beta1", - Resource: "clusters", - }, labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String()) + resourceConditions, err := status.GetResourceConditions(ctx, managedCluster.Namespace, r.DynamicClient, gvr, + labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String(), MaxChildObjects) if err != nil { notFoundErr := status.ResourceNotFoundError{} if errors.As(err, ¬FoundErr) { @@ -125,14 +124,20 @@ func (r *ManagedClusterReconciler) setStatusFromClusterStatus( allConditionsComplete := true for _, metaCondition := range resourceConditions.Conditions { - if metaCondition.Status != "True" { - allConditionsComplete = false - } + if slices.Contains(conditions, metaCondition.Type) { + if metaCondition.Status != "True" { + if metaCondition.Message != "" { + metaCondition.Message = gvr.Resource + ": " + metaCondition.Message + } + allConditionsComplete = false + } - if metaCondition.Reason == "" && metaCondition.Status == "True" { - metaCondition.Reason = "Succeeded" + if metaCondition.Reason == "" && metaCondition.Status == "True" { + metaCondition.Message = gvr.Resource + " are Ready" + metaCondition.Reason = "Succeeded" + } + apimeta.SetStatusCondition(managedCluster.GetConditions(), metaCondition) } - apimeta.SetStatusCondition(managedCluster.GetConditions(), metaCondition) } return !allConditionsComplete, nil @@ -309,7 +314,7 @@ func (r *ManagedClusterReconciler) Update(ctx context.Context, managedCluster *h }) } - requeue, err := r.setStatusFromClusterStatus(ctx, managedCluster) + requeue, err := r.needToRequeue(ctx, managedCluster) if err != nil { if requeue { return ctrl.Result{RequeueAfter: DefaultRequeueInterval}, err @@ -337,6 +342,42 @@ func (r *ManagedClusterReconciler) Update(ctx context.Context, managedCluster *h return ctrl.Result{}, nil } +func (r *ManagedClusterReconciler) needToRequeue(ctx context.Context, managedCluster *hmc.ManagedCluster) (bool, error) { + type objectToCheck struct { + gvr schema.GroupVersionResource + conditions []string + } + + var needToRequeue bool + var errs error + for _, obj := range []objectToCheck{ + { + gvr: schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "clusters", + }, + conditions: []string{"ControlPlaneInitialized", "ControlPlaneReady", "InfrastructureReady"}, + }, + { + gvr: schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machinedeployments", + }, + conditions: []string{"Available"}, + }, + } { + requeue, err := r.setStatusFromChildObjects(ctx, managedCluster, obj.gvr, obj.conditions) + errs = errors.Join(errs, err) + if requeue { + needToRequeue = true + } + } + + return needToRequeue, errs +} + // updateServices reconciles services provided in ManagedCluster.Spec.Services. // TODO(https://github.com/Mirantis/hmc/issues/361): Set status to ManagedCluster object at appropriate places. func (r *ManagedClusterReconciler) updateServices(ctx context.Context, mc *hmc.ManagedCluster) (ctrl.Result, error) { diff --git a/internal/controller/management_controller.go b/internal/controller/management_controller.go index e2ade51c1..06a00be32 100644 --- a/internal/controller/management_controller.go +++ b/internal/controller/management_controller.go @@ -243,7 +243,7 @@ func (r *ManagementReconciler) checkProviderStatus(ctx context.Context, provider } resourceConditions, err := status.GetResourceConditions(ctx, r.SystemNamespace, r.DynamicClient, gvr, - labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: providerTemplateName}).String(), + labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: providerTemplateName}).String(), 1, ) if err != nil { notFoundErr := status.ResourceNotFoundError{} diff --git a/internal/utils/status/status.go b/internal/utils/status/status.go index 389005f28..77c48c9ff 100644 --- a/internal/utils/status/status.go +++ b/internal/utils/status/status.go @@ -86,10 +86,10 @@ type ResourceConditions struct { // checked by the caller to prevent reconciliation loops. func GetResourceConditions( ctx context.Context, namespace string, dynamicClient dynamic.Interface, - gvr schema.GroupVersionResource, labelSelector string, + gvr schema.GroupVersionResource, labelSelector string, limit int, ) (resourceConditions *ResourceConditions, err error) { list, err := dynamicClient.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{ - LabelSelector: labelSelector, Limit: 2, + LabelSelector: labelSelector, }) if err != nil { if apierrors.IsNotFound(err) { @@ -103,15 +103,19 @@ func GetResourceConditions( return nil, ResourceNotFoundError{Resource: gvr.Resource} } - if len(list.Items) > 1 { - return nil, fmt.Errorf("expected to find only one of resource: %s with label: %q, found: %d", - gvr.Resource, labelSelector, len(list.Items)) + if len(list.Items) > limit { + return nil, fmt.Errorf("expected to find only %d of resource: %s with label: %q, found: %d", + limit, gvr.Resource, labelSelector, len(list.Items)) } + var conditions []metav1.Condition kind, name := ObjKindName(&list.Items[0]) - conditions, err := ConditionsFromUnstructured(&list.Items[0]) - if err != nil { - return nil, fmt.Errorf("failed to get conditions: %w", err) + for _, item := range list.Items { + c, err := ConditionsFromUnstructured(&item) + if err != nil { + return nil, fmt.Errorf("failed to get conditions: %w", err) + } + conditions = append(conditions, c...) } return &ResourceConditions{ diff --git a/templates/provider/hmc/templates/rbac/controller/roles.yaml b/templates/provider/hmc/templates/rbac/controller/roles.yaml index 9334495ee..b4cfea475 100644 --- a/templates/provider/hmc/templates/rbac/controller/roles.yaml +++ b/templates/provider/hmc/templates/rbac/controller/roles.yaml @@ -20,6 +20,11 @@ rules: resources: - clusters verbs: {{ include "rbac.viewerVerbs" . | nindent 4 }} +- apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + verbs: {{ include "rbac.viewerVerbs" . | nindent 4 }} - apiGroups: - helm.toolkit.fluxcd.io resources: