Skip to content

Commit

Permalink
operator/tests: Fix long test pipeline: test shutdown data-safety
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Wilson <[email protected]>
  • Loading branch information
aaronnw committed Jul 11, 2024
1 parent d2c1b3c commit 2158e8e
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 143 deletions.
12 changes: 5 additions & 7 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,17 @@ operator:minikube:long:
- ais-k8s-multinode
timeout: 40m
rules:
- if: '$CI_PIPELINE_SOURCE == "schedule"'
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /.*skip-ci.*/'
- if: $CI_PIPELINE_SOURCE == "schedule"
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /.*skip-ci.*/
when: never
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
changes:
- operator/*
- if: '$CI_COMMIT_BRANCH == "main"'
- if: $CI_COMMIT_BRANCH == "main"
changes:
- operator/*
allow_failure: true
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: $CI_PIPELINE_SOURCE == "merge_request_event" || $CI_COMMIT_BRANCH == "main"
when: manual
allow_failure: true
script:
- make -C operator test-long

Expand Down
92 changes: 0 additions & 92 deletions operator/tests/integration/ais_test.go

This file was deleted.

103 changes: 59 additions & 44 deletions operator/tests/integration/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ const (
clusterDestroyTimeout = 2 * time.Minute
)

var (
proxyURL string
)

// clientCluster - used for managing cluster used AIS API tests
type clientCluster struct {
cluster *aisv1.AIStore
Expand Down Expand Up @@ -67,6 +71,26 @@ func (cc *clientCluster) create() {
Expect(tutils.StreamLogs(cc.ctx, testNSName)).To(BeNil())
}

// Initialize AIS tutils to use the deployed cluster
func initAISCluster(ctx context.Context, cluster *aisv1.AIStore) {
proxyURL := tutils.GetProxyURL(ctx, k8sClient, cluster)
retries := 2
for retries > 0 {
err := aistutils.WaitNodeReady(proxyURL, &aistutils.WaitRetryOpts{
MaxRetries: 12,
Interval: 10 * time.Second,
})
if err == nil {
break
}
retries--
time.Sleep(5 * time.Second)
}

// Wait until the cluster has actually started (targets have registered).
Expect(aistutils.InitCluster(proxyURL, aistutils.ClusterTypeK8s)).NotTo(HaveOccurred())
}

func (cc *clientCluster) cleanup(pvs []*corev1.PersistentVolume) {
cc.cancelLogsStream()
tutils.DestroyCluster(context.Background(), k8sClient, cc.cluster, cc.tout, tutils.ClusterCreateInterval)
Expand All @@ -75,47 +99,36 @@ func (cc *clientCluster) cleanup(pvs []*corev1.PersistentVolume) {
}
}

func (cc *clientCluster) restart() {
restartCluster(cc.ctx, cc.cluster)
initAISCluster(cc.ctx, cc.cluster)
}

var _ = Describe("Run Controller", func() {
Context("Deploy and Destroy cluster", func() {
Context("Deploy and Destroy cluster", Label("short"), func() {
Context("without externalLB", func() {
It("Should successfully create an AIS Cluster with required K8s objects", Label("short"), func() {
It("Should successfully create an AIS Cluster with required K8s objects", func() {
cluster, pvs := tutils.NewAISCluster(defaultCluArgs(), k8sClient)
createAndDestroyCluster(cluster, pvs, checkResExists, checkResShouldNotExist, false)
})

It("Should successfully create an AIS Cluster with AllowSharedOrNoDisks on > v3.23 image", Label("short"), func() {
It("Should successfully create an AIS Cluster with AllowSharedOrNoDisks on > v3.23 image", func() {
args := defaultCluArgs()
args.AllowSharedOrNoDisks = true
cluster, pvs := tutils.NewAISCluster(args, k8sClient)
createAndDestroyCluster(cluster, pvs, nil, nil, false)
})

It("Should successfully create an hetero-sized AIS Cluster", Label("short"), func() {
It("Should successfully create an hetero-sized AIS Cluster", func() {
cluArgs := defaultCluArgs()
cluArgs.TargetSize = 2
cluArgs.ProxySize = 1
cluster, pvs := tutils.NewAISCluster(cluArgs, k8sClient)
createAndDestroyCluster(cluster, pvs, nil, nil, false)
})

It("Should shutdown cluster when ShutdownCluster is true, scale up when false", Label("long"), func() {
ctx := context.Background()
cluster, pvs := tutils.NewAISCluster(defaultCluArgs(), k8sClient)
createCluster(ctx, cluster, tutils.GetClusterCreateTimeout(), tutils.ClusterCreateInterval)
// Shutdown, ensure statefulsets exist and are size 0
setClusterShutdown(ctx, cluster, true)
tutils.EventuallyProxyIsSize(ctx, k8sClient, cluster, 0, clusterDestroyTimeout)
tutils.EventuallyTargetIsSize(ctx, k8sClient, cluster, 0, clusterDestroyTimeout)
// Resume shutdown cluster, should become fully ready
setClusterShutdown(ctx, cluster, false)
tutils.WaitForClusterToBeReady(ctx, k8sClient, cluster,
clusterReadyTimeout, clusterReadyRetryInterval)
tutils.DestroyCluster(ctx, k8sClient, cluster, clusterDestroyTimeout)
tutils.DestroyPV(ctx, k8sClient, pvs)
})
})

Context("with externalLB", Label("short"), func() {
Context("with externalLB", func() {
It("Should successfully create an AIS Cluster with required K8s objects", func() {
tutils.CheckSkip(&tutils.SkipArgs{RequiresLB: true})
cluArgs := defaultCluArgs()
Expand Down Expand Up @@ -239,19 +252,17 @@ var _ = Describe("Run Controller", func() {
})

Describe("Data-safety tests", Label("long"), func() {
It("Re-deploying same cluster must retain data", func() {
It("Restarting cluster must retain data", func() {
cluArgs := defaultCluArgs()
cluArgs.EnableExternalLB = testAsExternalClient
cluArgs.CleanupData = false
cc, pvs := newClientCluster(cluArgs)
cc.create()
// put objects
var (
bck = aiscmn.Bck{Name: "TEST_BUCKET", Provider: aisapc.AIS}
bck = aiscmn.Bck{Name: "TEST_BCK_DATA_SAFETY", Provider: aisapc.AIS}
objPrefix = "test-opr/"
baseParam = aistutils.BaseAPIParams(proxyURL)
)
aisapi.DestroyBucket(baseParam, bck)
err := aisapi.CreateBucket(baseParam, bck, nil)
Expect(err).ShouldNot(HaveOccurred())
names, failCnt, err := aistutils.PutRandObjs(aistutils.PutObjectsArgs{
Expand All @@ -266,14 +277,10 @@ var _ = Describe("Run Controller", func() {
})
Expect(err).NotTo(HaveOccurred())
Expect(failCnt).To(Equal(0))
aistutils.EnsureObjectsExist(testCtx, baseParam, bck, names...)
// destroy cluster and pvs (data persists on mounts)
cc.cleanup(pvs)

// Re-deploy cluster and check if the data exists.
cc, pvs = newClientCluster(cluArgs)
cc.create()
aistutils.EnsureObjectsExist(testCtx, aistutils.BaseAPIParams(proxyURL), bck, names...)
tutils.ObjectsShouldExist(baseParam, bck, names...)
// Restart cluster
cc.restart()
tutils.ObjectsShouldExist(aistutils.BaseAPIParams(proxyURL), bck, names...)
cc.cleanup(pvs)
})

Expand All @@ -285,12 +292,11 @@ var _ = Describe("Run Controller", func() {
cc.create()
// put objects
var (
bck = aiscmn.Bck{Name: "TEST_BUCKET", Provider: aisapc.AIS}
objPrefix = "test-opr/"
baseParam = aistutils.BaseAPIParams(proxyURL)
bck = aiscmn.Bck{Name: "TEST_BCK_SCALE_DOWN", Provider: aisapc.AIS}
objPrefix = "test-opr/"
baseParams = aistutils.BaseAPIParams(proxyURL)
)
aisapi.DestroyBucket(baseParam, bck)
err := aisapi.CreateBucket(baseParam, bck, nil)
err := aisapi.CreateBucket(baseParams, bck, nil)
Expect(err).ShouldNot(HaveOccurred())
names, failCnt, err := aistutils.PutRandObjs(aistutils.PutObjectsArgs{
ProxyURL: proxyURL,
Expand All @@ -304,12 +310,12 @@ var _ = Describe("Run Controller", func() {
})
Expect(err).NotTo(HaveOccurred())
Expect(failCnt).To(Equal(0))
aistutils.EnsureObjectsExist(testCtx, baseParam, bck, names...)
tutils.ObjectsShouldExist(baseParams, bck, names...)

// Scale down cluster
scaleCluster(context.TODO(), cc.cluster, false, -1)

aistutils.EnsureObjectsExist(testCtx, aistutils.BaseAPIParams(proxyURL), bck, names...)
tutils.ObjectsShouldExist(baseParams, bck, names...)
cc.cleanup(pvs)
})

Expand All @@ -320,11 +326,9 @@ var _ = Describe("Run Controller", func() {
cc, pvs := newClientCluster(cluArgs)
cc.create()
// Create bucket
bck := aiscmn.Bck{Name: "TEST_BUCKET", Provider: aisapc.AIS}
bck := aiscmn.Bck{Name: "TEST_BCK_CLEANUP", Provider: aisapc.AIS}
baseParams := aistutils.BaseAPIParams(proxyURL)
err := aisapi.DestroyBucket(baseParams, bck)
Expect(err).ShouldNot(HaveOccurred())
err = aisapi.CreateBucket(baseParams, bck, nil)
err := aisapi.CreateBucket(baseParams, bck, nil)
Expect(err).ShouldNot(HaveOccurred())
_, failCnt, err := aistutils.PutRandObjs(aistutils.PutObjectsArgs{
ProxyURL: proxyURL,
Expand Down Expand Up @@ -495,6 +499,17 @@ func createClusters(ctx context.Context, clusters []*aisv1.AIStore, intervals ..
wg.Wait()
}

func restartCluster(ctx context.Context, cluster *aisv1.AIStore) {
// Shutdown, ensure statefulsets exist and are size 0
setClusterShutdown(ctx, cluster, true)
tutils.EventuallyProxyIsSize(ctx, k8sClient, cluster, 0, clusterDestroyTimeout)
tutils.EventuallyTargetIsSize(ctx, k8sClient, cluster, 0, clusterDestroyTimeout)
// Resume shutdown cluster, should become fully ready
setClusterShutdown(ctx, cluster, false)
tutils.WaitForClusterToBeReady(ctx, k8sClient, cluster,
clusterReadyTimeout, clusterReadyRetryInterval)
}

func setClusterShutdown(ctx context.Context, cluster *aisv1.AIStore, shutdown bool) {
cr, err := k8sClient.GetAIStoreCR(ctx, cluster.NamespacedName())
Expect(err).ShouldNot(HaveOccurred())
Expand Down
1 change: 1 addition & 0 deletions operator/tests/test_job_spec.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ metadata:
namespace: default
spec:
ttlSecondsAfterFinished: 0
backoffLimit: 0
template:
spec:
containers:
Expand Down
8 changes: 8 additions & 0 deletions operator/tests/tutils/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"time"

aisapi "github.com/NVIDIA/aistore/api"
aiscmn "github.com/NVIDIA/aistore/cmn"
aistutils "github.com/NVIDIA/aistore/tools"
aisv1 "github.com/ais-operator/api/v1beta1"
aisclient "github.com/ais-operator/pkg/client"
Expand Down Expand Up @@ -524,3 +525,10 @@ func EventuallyJobNotExists(ctx context.Context, client *aisclient.K8sClient,
return exists
}, intervals...).Should(BeFalse())
}

func ObjectsShouldExist(params aisapi.BaseParams, bck aiscmn.Bck, objectsNames ...string) {
for _, objName := range objectsNames {
_, err := aisapi.GetObject(params, bck, objName, nil)
Expect(err).NotTo(HaveOccurred())
}
}

0 comments on commit 2158e8e

Please sign in to comment.