Skip to content

Commit

Permalink
fix(cleanup): cleans up dependant resources (opendatahub-io#748)
Browse files Browse the repository at this point in the history
* feat: implements failing test for deletion using configmap

* fix(cleanup): cleans up dependant resources

The code responsible for cleaning up resources on cfg map presence was failing early due to operations on nil list instance of KfDef resources, leading to panic and restart of the pod making an impression that cleanup takes forever

* fix(reconcile): requeue only when actual error happens

Original code was always causing requeue as even if upgrade.OperatorUninstall(r.Client, r.RestConfig) resulted in nil error (success), it was wrapped in error with message error while operator uninstall: <nil>

* fix: reverts img placeholder in kustomize

* fix: removes commented out code
  • Loading branch information
bartoszmajsak authored Nov 21, 2023
1 parent bbb8a37 commit 3ca6f45
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,14 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R

if len(instances.Items) == 0 {
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use operatorUninstall function.
// Owned objects are automatically garbage collected.
// For additional cleanup logic use operatorUninstall function.
// Return and don't requeue
if upgrade.HasDeleteConfigMap(r.Client) {
return reconcile.Result{}, fmt.Errorf("error while operator uninstall: %v",
upgrade.OperatorUninstall(r.Client, r.RestConfig))
if uninstallErr := upgrade.OperatorUninstall(r.Client, r.RestConfig); uninstallErr != nil {
return ctrl.Result{}, fmt.Errorf("error while operator uninstall: %v",
uninstallErr)
}
}

return ctrl.Result{}, nil
Expand Down
78 changes: 32 additions & 46 deletions pkg/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"
"time"

"github.com/hashicorp/go-multierror"
operatorv1 "github.com/openshift/api/operator/v1"
ofapi "github.com/operator-framework/api/pkg/operators/v1alpha1"
olmclientset "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned/typed/operators/v1alpha1"
Expand Down Expand Up @@ -49,73 +50,63 @@ func OperatorUninstall(cli client.Client, cfg *rest.Config) error {
if err != nil {
return err
}
// Delete kfdefs if found
err = RemoveKfDefInstances(cli, platform)
if err != nil {

if err := RemoveKfDefInstances(cli, platform); err != nil {
return err
}

// Delete DSCInitialization instance
err = removeDSCInitialization(cli)
if err != nil {
if err := removeDSCInitialization(cli); err != nil {
return err
}

// Delete generated namespaces by the operator
generatedNamespaces := &corev1.NamespaceList{}
nsOptions := []client.ListOption{
client.MatchingLabels{cluster.ODHGeneratedNamespaceLabel: "true"},
}
if err := cli.List(context.TODO(), generatedNamespaces, nsOptions...); err != nil {
if !apierrs.IsNotFound(err) {
return fmt.Errorf("error getting generated namespaces : %v", err)
}
return fmt.Errorf("error getting generated namespaces : %v", err)
}

// Return if any one of the namespaces is Terminating due to resources that are in process of deletion. (e.g CRDs)
if len(generatedNamespaces.Items) != 0 {
for _, namespace := range generatedNamespaces.Items {
if namespace.Status.Phase == corev1.NamespaceTerminating {
return fmt.Errorf("waiting for namespace %v to be deleted", namespace.Name)
}
for _, namespace := range generatedNamespaces.Items {
if namespace.Status.Phase == corev1.NamespaceTerminating {
return fmt.Errorf("waiting for namespace %v to be deleted", namespace.Name)
}
}

// Delete all the active namespaces
for _, namespace := range generatedNamespaces.Items {
if namespace.Status.Phase == corev1.NamespaceActive {
if err := cli.Delete(context.TODO(), &namespace, []client.DeleteOption{}...); err != nil {
return fmt.Errorf("error deleting namespace %v: %v", namespace.Name, err)
}
fmt.Printf("Namespace %s deleted as a part of uninstall.", namespace.Name)
fmt.Printf("Namespace %s deleted as a part of uninstall.\n", namespace.Name)
}
}

// Wait for all resources to get cleaned up
time.Sleep(10 * time.Second)
fmt.Printf("All resources deleted as part of uninstall. Removing the operator csv")

fmt.Printf("All resources deleted as part of uninstall. Removing the operator csv\n")

return removeCsv(cli, cfg)
}

func removeDSCInitialization(cli client.Client) error {
// Last check if multiple instances of DSCInitialization exist
instanceList := &dsci.DSCInitializationList{}
var err error
err = cli.List(context.TODO(), instanceList)
if err != nil {

if err := cli.List(context.TODO(), instanceList); err != nil {
return err
}

if len(instanceList.Items) != 0 {
for _, dsciInstance := range instanceList.Items {
err = cli.Delete(context.TODO(), &dsciInstance)
if apierrs.IsNotFound(err) {
err = nil
}
var multiErr *multierror.Error
for _, dsciInstance := range instanceList.Items {
if err := cli.Delete(context.TODO(), &dsciInstance); !apierrs.IsNotFound(err) {
multiErr = multierror.Append(multiErr, err)
}
}

return err
return multiErr.ErrorOrNil()
}

// HasDeleteConfigMap returns true if delete configMap is added to the operator namespace by managed-tenants repo.
Expand Down Expand Up @@ -185,10 +176,10 @@ func CreateDefaultDSC(cli client.Client, platform deploy.Platform) error {
err := cli.Create(context.TODO(), releaseDataScienceCluster)
switch {
case err == nil:
fmt.Printf("created DataScienceCluster resource")
fmt.Printf("created DataScienceCluster resource\n")
case apierrs.IsAlreadyExists(err):
// Do not update the DSC if it already exists
fmt.Printf("DataScienceCluster resource already exists. It will not be updated with default DSC.")
fmt.Printf("DataScienceCluster resource already exists. It will not be updated with default DSC.\n")
return nil
default:
return fmt.Errorf("failed to create DataScienceCluster custom resource: %v", err)
Expand Down Expand Up @@ -235,7 +226,7 @@ func CreateDefaultDSCI(cli client.Client, platform deploy.Platform, appNamespace

switch {
case len(instances.Items) > 1:
fmt.Printf("only one instance of DSCInitialization object is allowed. Please delete other instances ")
fmt.Printf("only one instance of DSCInitialization object is allowed. Please delete other instances.\n")
return nil
case len(instances.Items) == 1:
if platform == deploy.ManagedRhods || platform == deploy.SelfManagedRhods {
Expand Down Expand Up @@ -346,7 +337,7 @@ func removeCsv(c client.Client, r *rest.Config) error {
}

if operatorCsv != nil {
fmt.Printf("Deleting csv %s", operatorCsv.Name)
fmt.Printf("Deleting csv %s\n", operatorCsv.Name)
err = c.Delete(context.TODO(), operatorCsv, []client.DeleteOption{}...)
if err != nil {
if apierrs.IsNotFound(err) {
Expand All @@ -355,9 +346,9 @@ func removeCsv(c client.Client, r *rest.Config) error {

return fmt.Errorf("error deleting clusterserviceversion: %v", err)
}
fmt.Printf("Clusterserviceversion %s deleted as a part of uninstall.", operatorCsv.Name)
fmt.Printf("Clusterserviceversion %s deleted as a part of uninstall.\n", operatorCsv.Name)
}
fmt.Printf("No clusterserviceversion for the operator found.")
fmt.Printf("No clusterserviceversion for the operator found.\n")

return nil
}
Expand Down Expand Up @@ -388,13 +379,14 @@ func getClusterServiceVersion(cfg *rest.Config, watchNameSpace string) (*ofapi.C
}

func getKfDefInstances(c client.Client) (*kfdefv1.KfDefList, error) {
// If KfDef CRD is not found, we see it as a cluster not pre-installed v1 operator // Check if kfdef are deployed
// If KfDef CRD is not found, we see it as a cluster not pre-installed v1 operator
// Check if kfdef are deployed
kfdefCrd := &apiextv1.CustomResourceDefinition{}
err := c.Get(context.TODO(), client.ObjectKey{Name: "kfdefs.kfdef.apps.kubeflow.org"}, kfdefCrd)
if err != nil {
if err := c.Get(context.TODO(), client.ObjectKey{Name: "kfdefs.kfdef.apps.kubeflow.org"}, kfdefCrd); err != nil {
if apierrs.IsNotFound(err) {
// If no Crd found, return, since its a new Installation
return nil, nil
// return empty list
return &kfdefv1.KfDefList{}, nil
} else {
return nil, fmt.Errorf("error retrieving kfdef CRD : %v", err)
}
Expand All @@ -403,14 +395,8 @@ func getKfDefInstances(c client.Client) (*kfdefv1.KfDefList, error) {
// If KfDef Instances found, and no DSC instances are found in Self-managed, that means this is an upgrade path from
// legacy version. Create a default DSC instance
kfDefList := &kfdefv1.KfDefList{}
err = c.List(context.TODO(), kfDefList)
if err != nil {
if apierrs.IsNotFound(err) {
// If no KfDefs, do nothing and return
return nil, nil
} else {
return nil, fmt.Errorf("error getting list of kfdefs: %v", err)
}
if err := c.List(context.TODO(), kfDefList); err != nil {
return &kfdefv1.KfDefList{}, fmt.Errorf("error getting list of kfdefs: %v", err)
}

return kfDefList, nil
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/controller_setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ func TestOdhOperator(t *testing.T) {
// Run deletion if skipDeletion is not set
if !skipDeletion {
t.Run("delete components", deletionTestSuite)

// This test case recreates entire DSC again and deletes afterward
t.Run("remove components by using labeled configmap", cfgMapDeletionTestSuite)
}
}

Expand Down
144 changes: 144 additions & 0 deletions tests/e2e/dsc_cfmap_deletion_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package e2e

import (
"context"
"fmt"
"testing"

"github.com/pkg/errors"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

dsc "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1"
dsci "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/upgrade"
)

func cfgMapDeletionTestSuite(t *testing.T) {
testCtx, err := NewTestContext()
require.NoError(t, err)

defer removeDeletionConfigMap(testCtx)

t.Run(testCtx.testDsc.Name, func(t *testing.T) {
t.Run("create data science cluster", func(t *testing.T) {
err = testCtx.testDSCCreation()
require.NoError(t, err, "Error to create DSC instance")
})

t.Run("ensure all components created", func(t *testing.T) {
err = testCtx.testAllApplicationCreation(t)
require.NoError(t, err, "Error to create DSC instance")
})

t.Run("trigger deletion using labeled config map", func(t *testing.T) {
err = testCtx.testDSCDeletionUsingConfigMap()
require.NoError(t, err, "Error to delete DSC instance")
})

t.Run("owned namespaces should be deleted", func(t *testing.T) {
err = testCtx.testOwnedNamespacesDeletion()
require.NoError(t, err, "Error while deleting owned namespaces")
})

t.Run("dsci should be deleted", func(t *testing.T) {
err = testCtx.testDSCIDeletion()
require.NoError(t, err, "failed deleting DSCI")
})

t.Run("applications resources should be deleted", func(t *testing.T) {
err = testCtx.testAllApplicationDeletion()
require.NoError(t, err, "Error to delete component")
})
})
}

func (tc *testContext) testDSCIDeletion() error {
dsciInstances := &dsci.DSCInitializationList{}
if err := tc.customClient.List(context.TODO(), dsciInstances); err != nil {
return errors.Wrap(err, "failed while listing DSCIs")
}

if len(dsciInstances.Items) != 0 {
return fmt.Errorf("expected DSCI removal, but got %v", dsciInstances)
}

return nil
}

func (tc *testContext) testDSCDeletionUsingConfigMap() error {
dscLookupKey := types.NamespacedName{Name: tc.testDsc.Name}
expectedDSC := &dsc.DataScienceCluster{}

if err := createDeletionConfigMap(tc); err != nil {
return err
}

err := tc.customClient.Get(tc.ctx, dscLookupKey, expectedDSC)
if err == nil {
dscerr := tc.customClient.Delete(tc.ctx, expectedDSC, &client.DeleteOptions{})
if dscerr != nil {
return fmt.Errorf("error deleting DSC instance %s: %v", expectedDSC.Name, dscerr)
}
} else if !k8serrors.IsNotFound(err) {
if err != nil {
return fmt.Errorf("error getting DSC instance :%w", err)
}
}

return nil
}

func (tc *testContext) testOwnedNamespacesDeletion() error {
if err := wait.PollUntilContextTimeout(tc.ctx, tc.resourceRetryInterval, tc.resourceCreationTimeout, false, func(ctx context.Context) (done bool, err error) {
namespaces, err := tc.kubeClient.CoreV1().Namespaces().List(ctx, metav1.ListOptions{
LabelSelector: cluster.ODHGeneratedNamespaceLabel,
})

return len(namespaces.Items) == 0, err
}); err != nil {
return errors.Wrap(err, "failed waiting for all owned namespaces to be deleted")
}

return nil
}

func removeDeletionConfigMap(tc *testContext) {
_ = tc.kubeClient.CoreV1().ConfigMaps(tc.operatorNamespace).Delete(context.TODO(), "delete-self-managed", metav1.DeleteOptions{})
}

func createDeletionConfigMap(tc *testContext) error {
configMap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "delete-self-managed",
Namespace: tc.operatorNamespace,
Labels: map[string]string{
upgrade.DeleteConfigMapLabel: "true",
},
},
}

configMaps := tc.kubeClient.CoreV1().ConfigMaps(configMap.Namespace)
if _, err := configMaps.Get(context.TODO(), configMap.Name, metav1.GetOptions{}); err != nil {
switch {
case k8serrors.IsNotFound(err):
if _, err = configMaps.Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil {
return err
}
case k8serrors.IsAlreadyExists(err):
if _, err = configMaps.Update(context.TODO(), configMap, metav1.UpdateOptions{}); err != nil {
return err
}
default:
return err
}
}

return nil
}
7 changes: 2 additions & 5 deletions tests/e2e/dsc_deletion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,8 @@ func (tc *testContext) testApplicationDeletion(component components.ComponentInt

return false, err
}
if len(appList.Items) != 0 {
return false, nil
} else {
return true, nil
}

return len(appList.Items) == 0, nil
}); err != nil {
return fmt.Errorf("error deleting component: %v", component.GetComponentName())
}
Expand Down

0 comments on commit 3ca6f45

Please sign in to comment.