Skip to content

Commit

Permalink
feat: add check on 2 MultiKueue CRD if v1alpha1 version in the cluste…
Browse files Browse the repository at this point in the history
…r, error out

- DSC condition on kueueReady:
	reason: MultiKueueCRDV1Alpah1Exist
	message: MultiKueue CRDs MultiKueueConfig v1alpha1 and MultiKueueCluster v1alpah1 exist, please remove them before proceed
- reconcile gets triggered by user's deletion on these 2 CRD

Signed-off-by: Wen Zhou <[email protected]>
  • Loading branch information
zdtsw committed Feb 5, 2025
1 parent 8504ce5 commit 27b1834
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 6 deletions.
4 changes: 2 additions & 2 deletions apis/components/v1alpha1/kserve_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ type KserveSpec struct {
type KserveCommonStatus struct {
// DefaultDeploymentMode is the value of the defaultDeploymentMode field
// as read from the "deploy" JSON in the inferenceservice-config ConfigMap
DefaultDeploymentMode string `json:"defaultDeploymentMode,omitempty"`
ServerlessMode operatorv1.ManagementState `json:"serverlessMode,omitempty"`
DefaultDeploymentMode string `json:"defaultDeploymentMode,omitempty"`
ServerlessMode operatorv1.ManagementState `json:"serverlessMode,omitempty"`
common.ComponentReleaseStatus `json:",inline"`
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,16 @@ metadata:
categories: AI/Machine Learning, Big Data
certified: "False"
containerImage: quay.io/opendatahub/opendatahub-operator:v2.23.1
createdAt: "2025-02-04T09:17:15Z"
createdAt: "2025-02-05T09:18:14Z"
olm.skipRange: '>=1.0.0 <2.23.1'
operators.operatorframework.io/builder: operator-sdk-v1.31.0
operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io",
"codeflares.components.platform.opendatahub.io", "dashboards.components.platform.opendatahub.io",
"datasciencepipelines.components.platform.opendatahub.io", "kserves.components.platform.opendatahub.io",
"kueues.components.platform.opendatahub.io", "modelmeshservings.components.platform.opendatahub.io",
"modelregistries.components.platform.opendatahub.io", "rays.components.platform.opendatahub.io",
"trainingoperators.components.platform.opendatahub.io", "trustyais.components.platform.opendatahub.io", "workbenches.components.platform.opendatahub.io",
"monitorings.services.platform.opendatahub.io","modelcontrollers.components.platform.opendatahub.io",
"trainingoperators.components.platform.opendatahub.io", "trustyais.components.platform.opendatahub.io",
"workbenches.components.platform.opendatahub.io", "monitorings.services.platform.opendatahub.io","modelcontrollers.components.platform.opendatahub.io",
"feastoperators.components.platform.opendatahub.io"]'
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
repository: https://github.com/opendatahub-io/opendatahub-operator
Expand Down Expand Up @@ -1317,7 +1317,7 @@ spec:
value: /opt/manifests
- name: ODH_PLATFORM_TYPE
value: OpenDataHub
image: quay.io/ugiordan/opendatahub-operator:v2.18.0-versions-mapping
image: REPLACE_IMAGE:latest
imagePullPolicy: Always
livenessProbe:
httpGet:
Expand Down
17 changes: 17 additions & 0 deletions controllers/components/kueue/kueue_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"

componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
Expand Down Expand Up @@ -67,6 +69,20 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
Owns(&admissionregistrationv1.MutatingWebhookConfiguration{}).
Owns(&admissionregistrationv1.ValidatingWebhookConfiguration{}).
Owns(&appsv1.Deployment{}, reconciler.WithPredicates(resources.NewDeploymentPredicate())).
Watches(
&extv1.CustomResourceDefinition{},
reconciler.WithEventHandler(
handlers.ToNamed(componentApi.KueueInstanceName)),
reconciler.WithPredicates(predicate.Or(
component.ForLabel(labels.ODH.Component(LegacyComponentName), labels.True),
// For the case when user manually delete old MultiKueueConfig/MultiKueueCluster CRDs v1alpha1
predicate.Funcs{
DeleteFunc: func(e event.DeleteEvent) bool {
return e.Object.GetName() == gvk.MultiKueueConfigV1Alpha1.Kind || e.Object.GetName() == gvk.MultikueueClusterV1Alpha1.Kind
},
},
)),
).
Watches(
&extv1.CustomResourceDefinition{},
reconciler.WithEventHandler(
Expand All @@ -75,6 +91,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
component.ForLabel(labels.ODH.Component(LegacyComponentName), labels.True)),
).
// Add Kueue-specific actions
WithAction(checkPreConditions). // check if CRD multikueueconfigs/multikueueclusters with v1alpha1 exist in cluster and not in termination
WithAction(initialize).
WithAction(devFlags).
WithAction(releases.NewAction()).
Expand Down
32 changes: 32 additions & 0 deletions controllers/components/kueue/kueue_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,46 @@ import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk"
odherrors "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/errors"
odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types"
odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/resources"
)

func checkPreConditions(ctx context.Context, rr *odhtypes.ReconciliationRequest) error {
k, ok := rr.Instance.(*componentApi.Kueue)
if !ok {
return fmt.Errorf("resource instance %v is not a componentApi.Kueue)", rr.Instance)
}

rConfig, eConfig := cluster.CRDVersioNotExists(ctx, rr.Client, gvk.MultiKueueConfigV1Alpha1.GroupKind(), gvk.MultiKueueConfigV1Alpha1.Version)
rCluster, eCluster := cluster.CRDVersioNotExists(ctx, rr.Client, gvk.MultikueueClusterV1Alpha1.GroupKind(), gvk.MultikueueClusterV1Alpha1.Version)
if eConfig != nil || eCluster != nil {
return odherrors.NewStopError("failed to check CRDs version: %v, %v", eConfig, eCluster)
}
if !rConfig || !rCluster {
s := k.GetStatus()
s.Phase = status.PhaseNotReady
meta.SetStatusCondition(&s.Conditions, metav1.Condition{
Type: status.ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: status.MultiKueueCRDReason,
Message: status.MultiKueueCRDMessage,
ObservedGeneration: s.ObservedGeneration,
})
return odherrors.NewStopError(status.MultiKueueCRDMessage)
}
return nil
}

func initialize(_ context.Context, rr *odhtypes.ReconciliationRequest) error {
rr.Manifests = append(rr.Manifests, manifestsPath())
return nil
Expand Down
6 changes: 6 additions & 0 deletions controllers/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ const (
"remove existing Argo workflows or set `spec.components.datasciencepipelines.managementState` to Removed to proceed"
)

// For Kueue MultiKueue CRD.
const (
MultiKueueCRDReason = "MultiKueueCRDV1Alpha1Exist"
MultiKueueCRDMessage = "MultiKueue CRDs MultiKueueConfig v1alpha1 and MultiKueueCluster v1Alpha1 exist, please remove them before proceed"
)

// SetProgressingCondition sets the ProgressingCondition to True and other conditions to false or
// Unknown. Used when we are just starting to reconcile, and there are no existing conditions.
func SetProgressingCondition(conditions *[]conditionsv1.Condition, reason string, message string) {
Expand Down
12 changes: 12 additions & 0 deletions pkg/cluster/gvk/gvk.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,16 @@ var (
Version: "v1",
Kind: "ValidatingAdmissionPolicyBinding",
}

MultiKueueConfigV1Alpha1 = schema.GroupVersionKind{
Group: "kueue.x-k8s.io",
Version: "v1alpha1",
Kind: "MultiKueueConfig",
}

MultikueueClusterV1Alpha1 = schema.GroupVersionKind{
Group: "kueue.x-k8s.io",
Version: "v1alpha1",
Kind: "MultiKueueCluster",
}
)
25 changes: 25 additions & 0 deletions pkg/cluster/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,28 @@ func CustomResourceDefinitionExists(ctx context.Context, cli client.Client, crdG

return err
}

// return true if not found, return false if not found.
// checks on both CRD API version also if it is under deletion.
func CRDVersioNotExists(ctx context.Context, cli client.Client, crdGK schema.GroupKind, version string) (bool, error) {
crd := &apiextv1.CustomResourceDefinition{}
name := strings.ToLower(fmt.Sprintf("%ss.%s", crdGK.Kind, crdGK.Group))
err := cli.Get(ctx, client.ObjectKey{Name: name}, crd)
if err != nil {
if errors.IsNotFound(err) {
return true, nil
}
return true, err
}
for _, v := range crd.Status.StoredVersions {
if v == version {
for _, condition := range crd.Status.Conditions {
if condition.Type == apiextv1.Terminating && condition.Status == apiextv1.ConditionTrue {
return true, nil
}
}
return false, nil
}
}
return true, nil
}

0 comments on commit 27b1834

Please sign in to comment.