diff --git a/internal/controller/managedcluster_controller.go b/internal/controller/managedcluster_controller.go index 55e15a217..b2ec7d80d 100644 --- a/internal/controller/managedcluster_controller.go +++ b/internal/controller/managedcluster_controller.go @@ -19,6 +19,7 @@ import ( "encoding/json" "errors" "fmt" + "slices" "strings" "time" @@ -56,6 +57,7 @@ import ( const ( DefaultRequeueInterval = 10 * time.Second + MaxChildObjects = 100 ) // ManagedClusterReconciler reconciles a ManagedCluster object @@ -104,35 +106,39 @@ func (r *ManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque return r.Update(ctx, managedCluster) } -func (r *ManagedClusterReconciler) setStatusFromClusterStatus( - ctx context.Context, managedCluster *hmc.ManagedCluster, -) (bool, error) { +func (r *ManagedClusterReconciler) setStatusFromChildObjects( + ctx context.Context, managedCluster *hmc.ManagedCluster, gvr schema.GroupVersionResource, conditions []string, +) (requeue bool, _ error) { l := ctrl.LoggerFrom(ctx) - resourceConditions, err := status.GetResourceConditions(ctx, managedCluster.Namespace, r.DynamicClient, schema.GroupVersionResource{ - Group: "cluster.x-k8s.io", - Version: "v1beta1", - Resource: "clusters", - }, labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String()) + resourceConditions, err := status.GetResourceConditions(ctx, managedCluster.Namespace, r.DynamicClient, gvr, + labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String(), MaxChildObjects) if err != nil { notFoundErr := status.ResourceNotFoundError{} if errors.As(err, ¬FoundErr) { l.Info(err.Error()) - return true, nil + // don't error or retry if nothing is available + return false, nil } return false, fmt.Errorf("failed to get conditions: %w", err) } allConditionsComplete := true for _, metaCondition := range resourceConditions.Conditions { - if metaCondition.Status != "True" { - allConditionsComplete = false - } + if slices.Contains(conditions, metaCondition.Type) { + if metaCondition.Status != "True" { + if metaCondition.Message != "" { + metaCondition.Message = gvr.Resource + ": " + metaCondition.Message + } + allConditionsComplete = false + } - if metaCondition.Reason == "" && metaCondition.Status == "True" { - metaCondition.Reason = "Succeeded" + if metaCondition.Reason == "" && metaCondition.Status == "True" { + metaCondition.Message = gvr.Resource + " are Ready" + metaCondition.Reason = "Succeeded" + } + apimeta.SetStatusCondition(managedCluster.GetConditions(), metaCondition) } - apimeta.SetStatusCondition(managedCluster.GetConditions(), metaCondition) } return !allConditionsComplete, nil @@ -309,7 +315,7 @@ func (r *ManagedClusterReconciler) Update(ctx context.Context, managedCluster *h }) } - requeue, err := r.setStatusFromClusterStatus(ctx, managedCluster) + requeue, err := r.needToRequeue(ctx, managedCluster) if err != nil { if requeue { return ctrl.Result{RequeueAfter: DefaultRequeueInterval}, err @@ -337,6 +343,42 @@ func (r *ManagedClusterReconciler) Update(ctx context.Context, managedCluster *h return ctrl.Result{}, nil } +func (r *ManagedClusterReconciler) needToRequeue(ctx context.Context, managedCluster *hmc.ManagedCluster) (bool, error) { + type objectToCheck struct { + gvr schema.GroupVersionResource + conditions []string + } + + var needToRequeue bool + var errs error + for _, obj := range []objectToCheck{ + { + gvr: schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "clusters", + }, + conditions: []string{"ControlPlaneInitialized", "ControlPlaneReady", "InfrastructureReady"}, + }, + { + gvr: schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machinedeployments", + }, + conditions: []string{"Available"}, + }, + } { + requeue, err := r.setStatusFromChildObjects(ctx, managedCluster, obj.gvr, obj.conditions) + errs = errors.Join(errs, err) + if requeue { + needToRequeue = true + } + } + + return needToRequeue, errs +} + // updateServices reconciles services provided in ManagedCluster.Spec.Services. func (r *ManagedClusterReconciler) updateServices(ctx context.Context, mc *hmc.ManagedCluster) (_ ctrl.Result, err error) { // servicesErr is handled separately from err because we do not want diff --git a/internal/controller/management_controller.go b/internal/controller/management_controller.go index 1bab92ac8..a9e957790 100644 --- a/internal/controller/management_controller.go +++ b/internal/controller/management_controller.go @@ -272,7 +272,7 @@ func (r *ManagementReconciler) checkProviderStatus(ctx context.Context, provider } resourceConditions, err := status.GetResourceConditions(ctx, r.SystemNamespace, r.DynamicClient, gvr, - labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: providerTemplateName}).String(), + labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: providerTemplateName}).String(), 1, ) if err != nil { notFoundErr := status.ResourceNotFoundError{} diff --git a/internal/utils/status/status.go b/internal/utils/status/status.go index 389005f28..77c48c9ff 100644 --- a/internal/utils/status/status.go +++ b/internal/utils/status/status.go @@ -86,10 +86,10 @@ type ResourceConditions struct { // checked by the caller to prevent reconciliation loops. func GetResourceConditions( ctx context.Context, namespace string, dynamicClient dynamic.Interface, - gvr schema.GroupVersionResource, labelSelector string, + gvr schema.GroupVersionResource, labelSelector string, limit int, ) (resourceConditions *ResourceConditions, err error) { list, err := dynamicClient.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{ - LabelSelector: labelSelector, Limit: 2, + LabelSelector: labelSelector, }) if err != nil { if apierrors.IsNotFound(err) { @@ -103,15 +103,19 @@ func GetResourceConditions( return nil, ResourceNotFoundError{Resource: gvr.Resource} } - if len(list.Items) > 1 { - return nil, fmt.Errorf("expected to find only one of resource: %s with label: %q, found: %d", - gvr.Resource, labelSelector, len(list.Items)) + if len(list.Items) > limit { + return nil, fmt.Errorf("expected to find only %d of resource: %s with label: %q, found: %d", + limit, gvr.Resource, labelSelector, len(list.Items)) } + var conditions []metav1.Condition kind, name := ObjKindName(&list.Items[0]) - conditions, err := ConditionsFromUnstructured(&list.Items[0]) - if err != nil { - return nil, fmt.Errorf("failed to get conditions: %w", err) + for _, item := range list.Items { + c, err := ConditionsFromUnstructured(&item) + if err != nil { + return nil, fmt.Errorf("failed to get conditions: %w", err) + } + conditions = append(conditions, c...) } return &ResourceConditions{ diff --git a/templates/provider/hmc/templates/rbac/controller/roles.yaml b/templates/provider/hmc/templates/rbac/controller/roles.yaml index 69206ee6b..d37c4f8f5 100644 --- a/templates/provider/hmc/templates/rbac/controller/roles.yaml +++ b/templates/provider/hmc/templates/rbac/controller/roles.yaml @@ -20,6 +20,11 @@ rules: resources: - clusters verbs: {{ include "rbac.viewerVerbs" . | nindent 4 }} +- apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + verbs: {{ include "rbac.viewerVerbs" . | nindent 4 }} - apiGroups: - helm.toolkit.fluxcd.io resources: