From f5c9427f0ebc7730f55092f4f82f40f457bb75b1 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 14 Nov 2024 18:03:24 +0200 Subject: [PATCH 1/8] Update k8s, split nodepools --- eksctl/2i2c-aws-us.jsonnet | 248 +++++++++++++++++++++++++++++++++++-- 1 file changed, 241 insertions(+), 7 deletions(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index 2e636560fe..4f2ae1c24f 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -25,13 +25,209 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge" }, - { instanceType: "r5.4xlarge" }, - { instanceType: "r5.16xlarge" }, + local notebookNodes = [ + // staging + { + instanceType: "r5.xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "nb-staging", + minSize: 0, + labels+: { "2i2c/hub-name": "staging" }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "staging", + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + // dask-staging + { + instanceType: "r5.xlarge", + namePrefix: "nb-dask-staging", + labels+: { "2i2c/hub-name": "dask-staging" }, + tags+: { "2i2c:hub-name": "dask-staging" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-dask-staging", + labels+: { "2i2c/hub-name": "dask-staging" }, + tags+: { "2i2c:hub-name": "dask-staging" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-dask-staging", + labels+: { "2i2c/hub-name": "dask-staging" }, + tags+: { "2i2c:hub-name": "dask-staging" } + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "nb-dask-staging", + minSize: 0, + labels+: { "2i2c/hub-name": "dask-staging" }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "dask-staging", + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + // showcase + { + instanceType: "r5.xlarge", + namePrefix: "nb-showcase", + labels+: { "2i2c/hub-name": "showcase" }, + tags+: { "2i2c:hub-name": "showcase" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-showcase", + labels+: { "2i2c/hub-name": "showcase" }, + tags+: { "2i2c:hub-name": "showcase" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-showcase", + labels+: { "2i2c/hub-name": "showcase" }, + tags+: { "2i2c:hub-name": "showcase" } + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "nb-showcase", + minSize: 0, + labels+: { "2i2c/hub-name": "showcase" }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "showcase", + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + // ncar-cisl + { + instanceType: "r5.xlarge", + namePrefix: "nb-ncar-cisl", + labels+: { "2i2c/hub-name": "ncar-cisl" }, + tags+: { "2i2c:hub-name": "ncar-cisl" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-ncar-cisl", + labels+: { "2i2c/hub-name": "ncar-cisl" }, + tags+: { "2i2c:hub-name": "ncar-cisl" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-ncar-cisl", + labels+: { "2i2c/hub-name": "ncar-cisl" }, + tags+: { "2i2c:hub-name": "ncar-cisl" } + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "nb-ncar-cisl", + minSize: 0, + labels+: { "2i2c/hub-name": "ncar-cisl" }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "ncar-cisl", + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + // itcoocean + { + instanceType: "r5.xlarge", + namePrefix: "itcoocean", + labels+: { "2i2c/hub-name": "itcoocean" }, + tags+: { "2i2c:hub-name": "itcoocean" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "itcoocean", + labels+: { "2i2c/hub-name": "itcoocean" }, + tags+: { "2i2c:hub-name": "itcoocean" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "itcoocean", + labels+: { "2i2c/hub-name": "itcoocean" }, + tags+: { "2i2c:hub-name": "itcoocean" } + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "nb-itcoocean", + minSize: 0, + labels+: { "2i2c/hub-name": "itcoocean" }, + tags+: { + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "itcoocean", + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + // cosmicds + { + instanceType: "r5.xlarge", + namePrefix: "cosmicds", + labels+: { "2i2c/hub-name": "cosmicds" }, + tags+: { "2i2c:hub-name": "cosmicds" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "cosmicds", + labels+: { "2i2c/hub-name": "cosmicds" }, + tags+: { "2i2c:hub-name": "cosmicds" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "cosmicds", + labels+: { "2i2c/hub-name": "cosmicds" }, + tags+: { "2i2c:hub-name": "cosmicds" } + }, { instanceType: "g4dn.xlarge", + namePrefix: "nb-cosmicds", + minSize: 0, + labels+: { "2i2c/hub-name": "cosmicds" }, tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "cosmicds", }, taints+: { "nvidia.com/gpu": "present:NoSchedule" @@ -54,7 +250,42 @@ local daskNodes = [ // A not yet fully established policy is being developed about using a single // node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. // - { instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }}, + { + namePrefix: "dask-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-dask-staging", + labels+: { "2i2c/hub-name": "dask-staging" }, + tags+: { "2i2c:hub-name": "dask-staging" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-showcase", + labels+: { "2i2c/hub-name": "showcase" }, + tags+: { "2i2c:hub-name": "showcase" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-ncar-cisl", + labels+: { "2i2c/hub-name": "ncar-cisl" }, + tags+: { "2i2c:hub-name": "ncar-cisl" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-itcoocean", + labels+: { "2i2c/hub-name": "itcoocean" }, + tags+: { "2i2c:hub-name": "itcoocean" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-cosmicds", + labels+: { "2i2c/hub-name": "cosmicds" }, + tags+: { "2i2c:hub-name": "cosmicds" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, ]; @@ -64,7 +295,7 @@ local daskNodes = [ metadata+: { name: "2i2c-aws-us", region: clusterRegion, - version: "1.29", + version: "1.30", }, availabilityZones: masterAzs, iam: { @@ -95,7 +326,7 @@ local daskNodes = [ [ ng + { namePrefix: 'core', - nameSuffix: 'a', + nameSuffix: 'b', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { @@ -145,6 +376,9 @@ local daskNodes = [ "k8s.dask.org_dedicated" : "worker:NoSchedule", "k8s.dask.org/dedicated" : "worker:NoSchedule" }, + tags+: { + "2i2c:node-purpose": "worker" + }, instancesDistribution+: { onDemandBaseCapacity: 0, onDemandPercentageAboveBaseCapacity: 0, From 514a319cb86f38008fb6fc36ed32181cea84b8da Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 14 Nov 2024 18:14:10 +0200 Subject: [PATCH 2/8] Add missing tag --- eksctl/2i2c-aws-us.jsonnet | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index 4f2ae1c24f..661efb6156 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -354,6 +354,9 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "user", "k8s.dask.org/node-purpose": "scheduler" }, + tags+: { + "2i2c:node-purpose": "user" + }, taints+: { "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule" From 27a2ec22aa144e8e18108ff6628843b69607e36c Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 14 Nov 2024 18:15:19 +0200 Subject: [PATCH 3/8] Put each cluster in its nodegroup --- config/clusters/2i2c-aws-us/cosmicds.values.yaml | 2 ++ config/clusters/2i2c-aws-us/dask-staging.values.yaml | 2 ++ config/clusters/2i2c-aws-us/itcoocean.values.yaml | 2 ++ config/clusters/2i2c-aws-us/ncar-cisl.values.yaml | 2 ++ config/clusters/2i2c-aws-us/showcase.values.yaml | 2 ++ config/clusters/2i2c-aws-us/staging.values.yaml | 2 ++ 6 files changed, 12 insertions(+) diff --git a/config/clusters/2i2c-aws-us/cosmicds.values.yaml b/config/clusters/2i2c-aws-us/cosmicds.values.yaml index b85d2bd7ff..38d384c046 100644 --- a/config/clusters/2i2c-aws-us/cosmicds.values.yaml +++ b/config/clusters/2i2c-aws-us/cosmicds.values.yaml @@ -38,6 +38,8 @@ jupyterhub: name: Cosmic DS, Harvard url: https://www.cosmicds.cfa.harvard.edu/ singleuser: + nodeSelector: + 2i2c/hub-name: cosmicds # No persistent storage should be kept to reduce any potential data # retention & privacy issues. # Ref https://github.com/2i2c-org/infrastructure/issues/2128#issuecomment-1635107926 diff --git a/config/clusters/2i2c-aws-us/dask-staging.values.yaml b/config/clusters/2i2c-aws-us/dask-staging.values.yaml index 9b82f68bf7..2396bc3fb6 100644 --- a/config/clusters/2i2c-aws-us/dask-staging.values.yaml +++ b/config/clusters/2i2c-aws-us/dask-staging.values.yaml @@ -28,6 +28,8 @@ basehub: name: 2i2c url: https://2i2c.org singleuser: + nodeSelector: + 2i2c/hub-name: dask-staging image: name: pangeo/pangeo-notebook tag: "latest" diff --git a/config/clusters/2i2c-aws-us/itcoocean.values.yaml b/config/clusters/2i2c-aws-us/itcoocean.values.yaml index 5fe8104a91..720328c6c5 100644 --- a/config/clusters/2i2c-aws-us/itcoocean.values.yaml +++ b/config/clusters/2i2c-aws-us/itcoocean.values.yaml @@ -45,6 +45,8 @@ jupyterhub: admin_users: - eeholmes # Eli Holmes, Community representative singleuser: + nodeSelector: + 2i2c/hub-name: itcoocean # Requested in https://2i2c.freshdesk.com/a/tickets/1320 defaultUrl: /lab # shared-public for collaboration diff --git a/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml b/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml index 7e9ee9d2f2..526998d486 100644 --- a/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml +++ b/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml @@ -45,6 +45,8 @@ basehub: - NicholasCote # Nicholas Cote, Initial administrator - nwehrheim # Nick Wehrheim, Community representative singleuser: + nodeSelector: + 2i2c/hub-name: ncar-cisl image: # image choice preliminary and is expected to be setup via # https://ncar-cisl.2i2c.cloud/services/configurator/ by the community diff --git a/config/clusters/2i2c-aws-us/showcase.values.yaml b/config/clusters/2i2c-aws-us/showcase.values.yaml index 4651311502..39aeae6315 100644 --- a/config/clusters/2i2c-aws-us/showcase.values.yaml +++ b/config/clusters/2i2c-aws-us/showcase.values.yaml @@ -49,6 +49,8 @@ basehub: Authenticator: enable_auth_state: true singleuser: + nodeSelector: + 2i2c/hub-name: showcase extraEnv: SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-showcase/$(JUPYTERHUB_USER) PERSISTENT_BUCKET: s3://2i2c-aws-us-persistent-showcase/$(JUPYTERHUB_USER) diff --git a/config/clusters/2i2c-aws-us/staging.values.yaml b/config/clusters/2i2c-aws-us/staging.values.yaml index 2020c91af1..2f9c93d5d6 100644 --- a/config/clusters/2i2c-aws-us/staging.values.yaml +++ b/config/clusters/2i2c-aws-us/staging.values.yaml @@ -33,5 +33,7 @@ jupyterhub: GitHubOAuthenticator: oauth_callback_url: "https://staging.aws.2i2c.cloud/hub/oauth_callback" singleuser: + nodeSelector: + 2i2c/hub-name: staging extraEnv: SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-staging/$(JUPYTERHUB_USER) From 45f2cebc7edd37dfe6e74e8fd44ed6bb589df0f7 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 14 Nov 2024 18:21:23 +0200 Subject: [PATCH 4/8] Not all hubs are daskhubs --- eksctl/2i2c-aws-us.jsonnet | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index 661efb6156..5834b991bc 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -252,12 +252,6 @@ local daskNodes = [ // { namePrefix: "dask-staging", - labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging" }, - instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } - }, - { - namePrefix: "dask-dask-staging", labels+: { "2i2c/hub-name": "dask-staging" }, tags+: { "2i2c:hub-name": "dask-staging" }, instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } @@ -274,18 +268,6 @@ local daskNodes = [ tags+: { "2i2c:hub-name": "ncar-cisl" }, instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } }, - { - namePrefix: "dask-itcoocean", - labels+: { "2i2c/hub-name": "itcoocean" }, - tags+: { "2i2c:hub-name": "itcoocean" }, - instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } - }, - { - namePrefix: "dask-cosmicds", - labels+: { "2i2c/hub-name": "cosmicds" }, - tags+: { "2i2c:hub-name": "cosmicds" }, - instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } - }, ]; From 22760764acbafc3b39fee1ba2d85734b0d894788 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 14 Nov 2024 18:21:56 +0200 Subject: [PATCH 5/8] Put dask workers in their own hub specific nodepool --- config/clusters/2i2c-aws-us/dask-staging.values.yaml | 11 +++++++++++ config/clusters/2i2c-aws-us/ncar-cisl.values.yaml | 11 +++++++++++ config/clusters/2i2c-aws-us/showcase.values.yaml | 11 +++++++++++ 3 files changed, 33 insertions(+) diff --git a/config/clusters/2i2c-aws-us/dask-staging.values.yaml b/config/clusters/2i2c-aws-us/dask-staging.values.yaml index 2396bc3fb6..710ad4111a 100644 --- a/config/clusters/2i2c-aws-us/dask-staging.values.yaml +++ b/config/clusters/2i2c-aws-us/dask-staging.values.yaml @@ -41,3 +41,14 @@ basehub: authenticator_class: "github" GitHubOAuthenticator: oauth_callback_url: "https://dask-staging.aws.2i2c.cloud/hub/oauth_callback" + +dask-gateway: + gateway: + scheduler: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: dask-staging + worker: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: dask-staging diff --git a/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml b/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml index 526998d486..da993783d4 100644 --- a/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml +++ b/config/clusters/2i2c-aws-us/ncar-cisl.values.yaml @@ -252,3 +252,14 @@ basehub: node.kubernetes.io/instance-type: g4dn.xlarge extra_resource_limits: nvidia.com/gpu: "1" + +dask-gateway: + gateway: + scheduler: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: ncar-cisl + worker: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: ncar-cisl diff --git a/config/clusters/2i2c-aws-us/showcase.values.yaml b/config/clusters/2i2c-aws-us/showcase.values.yaml index 39aeae6315..19c6b9d1b9 100644 --- a/config/clusters/2i2c-aws-us/showcase.values.yaml +++ b/config/clusters/2i2c-aws-us/showcase.values.yaml @@ -248,3 +248,14 @@ basehub: node.kubernetes.io/instance-type: g4dn.xlarge extra_resource_limits: nvidia.com/gpu: "1" + +dask-gateway: + gateway: + scheduler: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: showcase + worker: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: showcase From 2eaa72839883881c94e4c435f9114cbd65cf440b Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 15 Nov 2024 11:14:26 +0200 Subject: [PATCH 6/8] Only have gpus for ncar and showcase becasue only they requested it --- eksctl/2i2c-aws-us.jsonnet | 64 -------------------------------------- 1 file changed, 64 deletions(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index 5834b991bc..2b39e9d3e4 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -45,22 +45,6 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging" } }, - { - instanceType: "g4dn.xlarge", - namePrefix: "nb-staging", - minSize: 0, - labels+: { "2i2c/hub-name": "staging" }, - tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", - "2i2c:hub-name": "staging", - }, - taints+: { - "nvidia.com/gpu": "present:NoSchedule" - }, - // Allow provisioning GPUs across all AZs, to prevent situation where all - // GPUs in a single AZ are in use and no new nodes can be spawned - availabilityZones: masterAzs, - }, // dask-staging { instanceType: "r5.xlarge", @@ -80,22 +64,6 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "dask-staging" }, tags+: { "2i2c:hub-name": "dask-staging" } }, - { - instanceType: "g4dn.xlarge", - namePrefix: "nb-dask-staging", - minSize: 0, - labels+: { "2i2c/hub-name": "dask-staging" }, - tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", - "2i2c:hub-name": "dask-staging", - }, - taints+: { - "nvidia.com/gpu": "present:NoSchedule" - }, - // Allow provisioning GPUs across all AZs, to prevent situation where all - // GPUs in a single AZ are in use and no new nodes can be spawned - availabilityZones: masterAzs, - }, // showcase { instanceType: "r5.xlarge", @@ -185,22 +153,6 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "itcoocean" }, tags+: { "2i2c:hub-name": "itcoocean" } }, - { - instanceType: "g4dn.xlarge", - namePrefix: "nb-itcoocean", - minSize: 0, - labels+: { "2i2c/hub-name": "itcoocean" }, - tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", - "2i2c:hub-name": "itcoocean", - }, - taints+: { - "nvidia.com/gpu": "present:NoSchedule" - }, - // Allow provisioning GPUs across all AZs, to prevent situation where all - // GPUs in a single AZ are in use and no new nodes can be spawned - availabilityZones: masterAzs, - }, // cosmicds { instanceType: "r5.xlarge", @@ -220,22 +172,6 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "cosmicds" }, tags+: { "2i2c:hub-name": "cosmicds" } }, - { - instanceType: "g4dn.xlarge", - namePrefix: "nb-cosmicds", - minSize: 0, - labels+: { "2i2c/hub-name": "cosmicds" }, - tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", - "2i2c:hub-name": "cosmicds", - }, - taints+: { - "nvidia.com/gpu": "present:NoSchedule" - }, - // Allow provisioning GPUs across all AZs, to prevent situation where all - // GPUs in a single AZ are in use and no new nodes can be spawned - availabilityZones: masterAzs, - }, ]; From 03a1700c5ba837d300e4b0fde4517aab7614d850 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 15 Nov 2024 11:22:58 +0200 Subject: [PATCH 7/8] Rm duplicated list --- eksctl/2i2c-aws-us.jsonnet | 1 - 1 file changed, 1 deletion(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index 2b39e9d3e4..a75b8dbb17 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -25,7 +25,6 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - local notebookNodes = [ // staging { instanceType: "r5.xlarge", From cd8c60e7dc85822b1f7f0fc836f5036001c329c8 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 15 Nov 2024 12:59:51 +0200 Subject: [PATCH 8/8] Add a node-purpose tag on core node as well --- eksctl/2i2c-aws-us.jsonnet | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index a75b8dbb17..8326b152ef 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -243,7 +243,7 @@ local daskNodes = [ [ ng + { namePrefix: 'core', - nameSuffix: 'b', + nameSuffix: 'a', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { @@ -256,6 +256,9 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core" }, + tags+: { + "2i2c:node-purpose": "core" + }, }, ] + [ ng + {