From 202575a6b1f7cd1f68183539449dec8bf0093967 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Tue, 13 Aug 2024 17:44:21 -0700 Subject: [PATCH 1/8] test(e2e): validate that pulling from an acr registry attached to aks via --attach-acr with karpenter nodes works --- .github/actions/e2e/create-acr/action.yaml | 4 ++ .github/workflows/e2e-matrix.yaml | 2 +- Makefile | 2 +- Makefile-az.mk | 12 ++++ pkg/apis/crds/karpenter.sh_nodepools.yaml | 4 +- test/suites/acr/suite_test.go | 64 ++++++++++++++++++++++ 6 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 test/suites/acr/suite_test.go diff --git a/.github/actions/e2e/create-acr/action.yaml b/.github/actions/e2e/create-acr/action.yaml index a485e80ff..aadfed21c 100644 --- a/.github/actions/e2e/create-acr/action.yaml +++ b/.github/actions/e2e/create-acr/action.yaml @@ -40,3 +40,7 @@ runs: - name: create ACR shell: bash run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} AZURE_LOCATION=${{ inputs.location }} make az-mkacr + - name: import needed images + shell: bash + run: | + AZURE_ACR_NAME=${{ inputs.acr_name }} make az-acrimport diff --git a/.github/workflows/e2e-matrix.yaml b/.github/workflows/e2e-matrix.yaml index 6dde9d0de..552daa0ce 100644 --- a/.github/workflows/e2e-matrix.yaml +++ b/.github/workflows/e2e-matrix.yaml @@ -43,7 +43,7 @@ jobs: strategy: fail-fast: false matrix: - suite: [Nonbehavioral, Utilization, GPU, Drift, Integration, NodeClaim, Chaos] + suite: [Nonbehavioral, Utilization, GPU, Drift, Integration, NodeClaim, Chaos, Acr] permissions: contents: read id-token: write diff --git a/Makefile b/Makefile index d8204f6c4..3a93ed537 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ e2etests: ## Run the e2e suite against your local cluster # -count 1: prevents caching # -timeout: If a test binary runs longer than TEST_TIMEOUT, panic # -v: verbose output - cd test && CLUSTER_NAME=${CLUSTER_NAME} go test \ + cd test && CLUSTER_NAME=${CLUSTER_NAME} AZURE_ACR_NAME=${AZURE_ACR_NAME} go test \ -p 1 \ -count 1 \ -timeout ${TEST_TIMEOUT} \ diff --git a/Makefile-az.mk b/Makefile-az.mk index 5db0a0864..c077f71ab 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -38,6 +38,18 @@ az-mkacr: az-mkrg ## Create test ACR --sku Basic --admin-enabled -o none az acr login --name $(AZURE_ACR_NAME) +az-acrimport: ## Imports an image to an acr registry + az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6" + +az-cleanenv: + kubectl delete nodepools --all + for nodeclaim in $$(kubectl get nodeclaims --output=jsonpath={.items..metadata.name}); do \ + kubectl patch nodeclaim $$nodeclaim --type=json -p '[{"op": "remove", "path": "/metadata/finalizers"}]'; \ + done + kubectl delete nodeclaims --all + kubectl delete aksnodeclasses --all + kubectl delete pods -n default --all + az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySet for compatibility with standalone VMs) az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) --location $(AZURE_LOCATION) \ --enable-managed-identity --node-count 3 --generate-ssh-keys --vm-set-type AvailabilitySet -o none diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 6595d6dd4..b8c4473ee 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -180,7 +180,7 @@ spec: maxProperties: 100 x-kubernetes-validations: - message: label domain "kubernetes.io" is restricted - rule: self.all(x, x in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/region", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || x.find("^([^/]+)").endsWith("node.kubernetes.io") || x.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !x.find("^([^/]+)").endsWith("kubernetes.io")) + rule: self.all(x, x in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || x.find("^([^/]+)").endsWith("node.kubernetes.io") || x.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !x.find("^([^/]+)").endsWith("kubernetes.io")) - message: label domain "k8s.io" is restricted rule: self.all(x, x.find("^([^/]+)").endsWith("kops.k8s.io") || !x.find("^([^/]+)").endsWith("k8s.io")) - message: label domain "karpenter.sh" is restricted @@ -338,7 +338,7 @@ spec: pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ x-kubernetes-validations: - message: label domain "kubernetes.io" is restricted - rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.find("^([^/]+)").endsWith("node.kubernetes.io") || self.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !self.find("^([^/]+)").endsWith("kubernetes.io") + rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.find("^([^/]+)").endsWith("node.kubernetes.io") || self.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !self.find("^([^/]+)").endsWith("kubernetes.io") - message: label domain "k8s.io" is restricted rule: self.find("^([^/]+)").endsWith("kops.k8s.io") || !self.find("^([^/]+)").endsWith("k8s.io") - message: label domain "karpenter.sh" is restricted diff --git a/test/suites/acr/suite_test.go b/test/suites/acr/suite_test.go new file mode 100644 index 000000000..686a75588 --- /dev/null +++ b/test/suites/acr/suite_test.go @@ -0,0 +1,64 @@ +package acr + +import ( + "fmt" + "os" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + + "github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2" + "github.com/Azure/karpenter-provider-azure/test/pkg/environment/azure" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/labels" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "sigs.k8s.io/karpenter/pkg/test" +) + +var env *azure.Environment +var nodeClass *v1alpha2.AKSNodeClass +var nodePool *corev1beta1.NodePool +var pauseImage string + +func TestAcr(t *testing.T) { + RegisterFailHandler(Fail) + BeforeSuite(func() { + env = azure.NewEnvironment(t) + acrName := os.Getenv("AZURE_ACR_NAME") + Expect(acrName).NotTo(BeEmpty(), "AZURE_ACR_NAME must be set for the acr test suite") + pauseImage = fmt.Sprintf("%s.azurecr.io/pause:3.6", acrName) + }) + RunSpecs(t, "Acr") +} + +var _ = BeforeEach(func() { + env.BeforeEach() + nodeClass = env.DefaultAKSNodeClass() + nodePool = env.DefaultNodePool(nodeClass) +}) +var _ = AfterEach(func() { env.Cleanup() }) +var _ = AfterEach(func() { env.AfterEach() }) + +var _ = Describe("Acr", func() { + Describe("Image Pull", func() { + It("should allow karpenter user pool nodes to pull images from the clusters attached acr", func() { + deployment := test.Deployment(test.DeploymentOptions{ + Replicas: 10, + PodOptions: test.PodOptions{ + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1.1"), + }, + }, + Image: pauseImage, + }, + }) + + env.ExpectCreated(nodePool, nodeClass, deployment) + env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*15, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + }) + }) +}) From cd9be95acbc8cc8ddcc4b9f903a1b153b951a7ae Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Tue, 13 Aug 2024 17:49:03 -0700 Subject: [PATCH 2/8] fix: ci --- test/suites/acr/suite_test.go | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/test/suites/acr/suite_test.go b/test/suites/acr/suite_test.go index 686a75588..c976d4507 100644 --- a/test/suites/acr/suite_test.go +++ b/test/suites/acr/suite_test.go @@ -1,9 +1,24 @@ -package acr +/* +Portions Copyright (c) Microsoft Corporation. -import ( - "fmt" - "os" - "testing" +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +/* +Portions Copyright (c) Microsoft Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ "time" . "github.com/onsi/ginkgo/v2" From 54a5d13126644cd0a57bd1cc2997b7e808adf444 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Wed, 14 Aug 2024 02:14:54 +0000 Subject: [PATCH 3/8] test: ci --- test/suites/acr/suite_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/suites/acr/suite_test.go b/test/suites/acr/suite_test.go index c976d4507..790884933 100644 --- a/test/suites/acr/suite_test.go +++ b/test/suites/acr/suite_test.go @@ -4,14 +4,8 @@ Portions Copyright (c) Microsoft Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -/* -Portions Copyright (c) Microsoft Corporation. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -19,6 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +package acr + +import ( + "fmt" + "os" + "testing" "time" . "github.com/onsi/ginkgo/v2" From 2b232256c14eccbef1866abd6ab9424f2dd34e5f Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Tue, 13 Aug 2024 19:20:43 -0700 Subject: [PATCH 4/8] fix: crd validation breaks on local so accidentally committed the change with it disabled --- pkg/apis/crds/karpenter.sh_nodepools.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index b8c4473ee..6595d6dd4 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -180,7 +180,7 @@ spec: maxProperties: 100 x-kubernetes-validations: - message: label domain "kubernetes.io" is restricted - rule: self.all(x, x in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || x.find("^([^/]+)").endsWith("node.kubernetes.io") || x.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !x.find("^([^/]+)").endsWith("kubernetes.io")) + rule: self.all(x, x in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/region", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || x.find("^([^/]+)").endsWith("node.kubernetes.io") || x.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !x.find("^([^/]+)").endsWith("kubernetes.io")) - message: label domain "k8s.io" is restricted rule: self.all(x, x.find("^([^/]+)").endsWith("kops.k8s.io") || !x.find("^([^/]+)").endsWith("k8s.io")) - message: label domain "karpenter.sh" is restricted @@ -338,7 +338,7 @@ spec: pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ x-kubernetes-validations: - message: label domain "kubernetes.io" is restricted - rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.find("^([^/]+)").endsWith("node.kubernetes.io") || self.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !self.find("^([^/]+)").endsWith("kubernetes.io") + rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.find("^([^/]+)").endsWith("node.kubernetes.io") || self.find("^([^/]+)").endsWith("node-restriction.kubernetes.io") || !self.find("^([^/]+)").endsWith("kubernetes.io") - message: label domain "k8s.io" is restricted rule: self.find("^([^/]+)").endsWith("kops.k8s.io") || !self.find("^([^/]+)").endsWith("k8s.io") - message: label domain "karpenter.sh" is restricted From c7ee1066aee7f9c1e7f7b36f7a338ae8930dce36 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 21 Aug 2024 23:40:48 -0700 Subject: [PATCH 5/8] fix: passing in azure acr name from env rather than using makefile default --- .github/workflows/e2e.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 548e54cde..f9a0abecd 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -129,7 +129,7 @@ jobs: if: inputs.suite != 'Nonbehavioral' run: | AZURE_CLUSTER_NAME=${{ env.CLUSTER_NAME }} AZURE_RESOURCE_GROUP=${{ env.RG_NAME }} make az-creds - CLUSTER_NAME=${{ env.CLUSTER_NAME }} TEST_SUITE="${{ inputs.suite }}" GIT_REF="$(git rev-parse HEAD)" make e2etests + CLUSTER_NAME=${{ env.CLUSTER_NAME }} AZURE_ACR_NAME=${{ env.ACR_NAME}} TEST_SUITE="${{ inputs.suite }}" GIT_REF="$(git rev-parse HEAD)" make e2etests - name: dump logs on failure uses: ./.github/actions/e2e/dump-logs if: failure() || cancelled() From b19b38e7039c6a6e13432b1f1ca1c1682f383f20 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Thu, 22 Aug 2024 16:37:01 +0000 Subject: [PATCH 6/8] fix: ci again? --- test/suites/acr/suite_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/suites/acr/suite_test.go b/test/suites/acr/suite_test.go index 790884933..f0c25fb94 100644 --- a/test/suites/acr/suite_test.go +++ b/test/suites/acr/suite_test.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package acr import ( From fd454bf9d7bcf49a5af8ed6e03cc1d4c713f31e9 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Thu, 22 Aug 2024 14:55:18 -0700 Subject: [PATCH 7/8] fix: nit comments addressed --- .github/workflows/e2e-matrix.yaml | 2 +- Makefile-az.mk | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/e2e-matrix.yaml b/.github/workflows/e2e-matrix.yaml index 552daa0ce..43ce1a351 100644 --- a/.github/workflows/e2e-matrix.yaml +++ b/.github/workflows/e2e-matrix.yaml @@ -43,7 +43,7 @@ jobs: strategy: fail-fast: false matrix: - suite: [Nonbehavioral, Utilization, GPU, Drift, Integration, NodeClaim, Chaos, Acr] + suite: [Nonbehavioral, Utilization, GPU, Drift, Integration, NodeClaim, Chaos, ACR] permissions: contents: read id-token: write diff --git a/Makefile-az.mk b/Makefile-az.mk index c077f71ab..3019d9a9e 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -41,14 +41,11 @@ az-mkacr: az-mkrg ## Create test ACR az-acrimport: ## Imports an image to an acr registry az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6" -az-cleanenv: +az-cleanenv: az-rmnodeclaims-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses) + kubectl delete pods -n default --all + kubectl delete nodeclaims --all kubectl delete nodepools --all - for nodeclaim in $$(kubectl get nodeclaims --output=jsonpath={.items..metadata.name}); do \ - kubectl patch nodeclaim $$nodeclaim --type=json -p '[{"op": "remove", "path": "/metadata/finalizers"}]'; \ - done - kubectl delete nodeclaims --all kubectl delete aksnodeclasses --all - kubectl delete pods -n default --all az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySet for compatibility with standalone VMs) az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) --location $(AZURE_LOCATION) \ From 0a73168093143edd5de9e9a22ecc593d86452b89 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Thu, 22 Aug 2024 14:56:11 -0700 Subject: [PATCH 8/8] test: only provisioning one pod --- test/suites/acr/suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/suites/acr/suite_test.go b/test/suites/acr/suite_test.go index f0c25fb94..076b03ca0 100644 --- a/test/suites/acr/suite_test.go +++ b/test/suites/acr/suite_test.go @@ -62,7 +62,7 @@ var _ = Describe("Acr", func() { Describe("Image Pull", func() { It("should allow karpenter user pool nodes to pull images from the clusters attached acr", func() { deployment := test.Deployment(test.DeploymentOptions{ - Replicas: 10, + Replicas: 1, PodOptions: test.PodOptions{ ResourceRequirements: v1.ResourceRequirements{ Requests: v1.ResourceList{