Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add check on 2 MultiKueue CRD if v1alpha1 version in the cluster and not in termination, error out #1616

Merged
merged 5 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ metadata:
categories: AI/Machine Learning, Big Data
certified: "False"
containerImage: quay.io/opendatahub/opendatahub-operator:v2.23.1
createdAt: "2025-02-04T09:17:15Z"
createdAt: "2025-02-05T09:18:14Z"
olm.skipRange: '>=1.0.0 <2.23.1'
operators.operatorframework.io/builder: operator-sdk-v1.31.0
operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io",
Expand Down
1 change: 1 addition & 0 deletions controllers/components/kueue/kueue_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
component.ForLabel(labels.ODH.Component(LegacyComponentName), labels.True)),
).
// Add Kueue-specific actions
WithAction(checkPreConditions). // check if CRD multikueueconfigs/multikueueclusters with v1alpha1 exist in cluster and not in termination
WithAction(initialize).
WithAction(devFlags).
WithAction(releases.NewAction()).
Expand Down
32 changes: 32 additions & 0 deletions controllers/components/kueue/kueue_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,46 @@ import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk"
odherrors "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/errors"
odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types"
odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/resources"
)

func checkPreConditions(ctx context.Context, rr *odhtypes.ReconciliationRequest) error {
k, ok := rr.Instance.(*componentApi.Kueue)
if !ok {
return fmt.Errorf("resource instance %v is not a componentApi.Kueue)", rr.Instance)
}

rConfig, eConfig := cluster.HasCRDWithVersion(ctx, rr.Client, gvk.MultiKueueConfigV1Alpha1.GroupKind(), gvk.MultiKueueConfigV1Alpha1.Version)
rCluster, eCluster := cluster.HasCRDWithVersion(ctx, rr.Client, gvk.MultikueueClusterV1Alpha1.GroupKind(), gvk.MultikueueClusterV1Alpha1.Version)
if eConfig != nil || eCluster != nil {
return odherrors.NewStopError("failed to check CRDs version: %v, %v", eConfig, eCluster)
}
if rConfig || rCluster {
s := k.GetStatus()
s.Phase = status.PhaseNotReady
meta.SetStatusCondition(&s.Conditions, metav1.Condition{
Type: status.ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: status.MultiKueueCRDReason,
Message: status.MultiKueueCRDMessage,
ObservedGeneration: s.ObservedGeneration,
})
return odherrors.NewStopError(status.MultiKueueCRDMessage)
}
return nil
}

func initialize(_ context.Context, rr *odhtypes.ReconciliationRequest) error {
rr.Manifests = append(rr.Manifests, manifestsPath())
return nil
Expand Down
6 changes: 6 additions & 0 deletions controllers/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ const (
"remove existing Argo workflows or set `spec.components.datasciencepipelines.managementState` to Removed to proceed"
)

// For Kueue MultiKueue CRD.
const (
MultiKueueCRDReason = "MultiKueueCRDV1Alpha1Exist"
MultiKueueCRDMessage = "MultiKueue CRDs MultiKueueConfig v1alpha1 and MultiKueueCluster v1Alpha1 exist, please remove them to proceed"
)

// SetProgressingCondition sets the ProgressingCondition to True and other conditions to false or
// Unknown. Used when we are just starting to reconcile, and there are no existing conditions.
func SetProgressingCondition(conditions *[]conditionsv1.Condition, reason string, message string) {
Expand Down
12 changes: 12 additions & 0 deletions pkg/cluster/gvk/gvk.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,16 @@ var (
Version: "v1",
Kind: "ValidatingAdmissionPolicyBinding",
}

MultiKueueConfigV1Alpha1 = schema.GroupVersionKind{
Group: "kueue.x-k8s.io",
Version: "v1alpha1",
Kind: "MultiKueueConfig",
}

MultikueueClusterV1Alpha1 = schema.GroupVersionKind{
Group: "kueue.x-k8s.io",
Version: "v1alpha1",
Kind: "MultiKueueCluster",
}
)
25 changes: 25 additions & 0 deletions pkg/cluster/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,28 @@ func CustomResourceDefinitionExists(ctx context.Context, cli client.Client, crdG

return err
}

// return true if found, return false if not found required CRD with version
// checks on both CRD API version also if it is under deletion.
func HasCRDWithVersion(ctx context.Context, cli client.Client, crdGK schema.GroupKind, version string) (bool, error) {
crd := &apiextv1.CustomResourceDefinition{}
name := strings.ToLower(fmt.Sprintf("%ss.%s", crdGK.Kind, crdGK.Group))
err := cli.Get(ctx, client.ObjectKey{Name: name}, crd)
if err != nil {
if errors.IsNotFound(err) {
return false, nil
}
return false, err
}
for _, v := range crd.Status.StoredVersions {
if v == version {
for _, condition := range crd.Status.Conditions {
if condition.Type == apiextv1.Terminating && condition.Status == apiextv1.ConditionTrue {
return false, nil
}
}
return true, nil
}
}
return false, nil
}
10 changes: 9 additions & 1 deletion tests/e2e/components_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ func (c *ComponentTestCtx) ValidateComponentDisabled(t *testing.T) {
))
}

func (c *ComponentTestCtx) ValidateCRDReinstated(t *testing.T, name string) {
func (c *ComponentTestCtx) ValidateCRDReinstated(t *testing.T, name string, version ...string) {
t.Helper()

g := c.NewWithT(t)
Expand Down Expand Up @@ -258,6 +258,14 @@ func (c *ComponentTestCtx) ValidateCRDReinstated(t *testing.T, name string) {
g.List(gvk.CustomResourceDefinition, crdSel).Eventually().Should(
HaveLen(1),
)
if len(version) != 0 {
g.Get(
gvk.CustomResourceDefinition,
types.NamespacedName{Name: name},
).Eventually(5*time.Second, 500*time.Millisecond).Should(
jq.Match(`.status.storedVersions[0] == "%s"`, version[0]),
lburgazzoli marked this conversation as resolved.
Show resolved Hide resolved
)
}
}

// Validate releases for any component in the DataScienceCluster.
Expand Down
14 changes: 8 additions & 6 deletions tests/e2e/kueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,17 @@ func (tc *KueueTestCtx) validateKueueVAPReady(t *testing.T) {
}

func (tc *KueueTestCtx) validateCRDReinstated(t *testing.T) {
crds := []string{
"workloads.kueue.x-k8s.io",
"multikueueclusters.kueue.x-k8s.io",
"multikueueconfigs.kueue.x-k8s.io",
// validate multikuueclusters, multikueueconfigs has v1beta1 version

crds := map[string]string{
"workloads.kueue.x-k8s.io": "v1beta1",
"multikueueclusters.kueue.x-k8s.io": "v1beta1",
"multikueueconfigs.kueue.x-k8s.io": "v1beta1",
}

for _, crd := range crds {
for crd, version := range crds {
t.Run(crd, func(t *testing.T) {
tc.ValidateCRDReinstated(t, crd)
tc.ValidateCRDReinstated(t, crd, version)
})
}
}