Skip to content

Commit

Permalink
Merge pull request #5127 from sgibson91/nasa-cryo/dedicated-nodegroups
Browse files Browse the repository at this point in the history
nasa-cryo: create hub-specific nodegroups
  • Loading branch information
sgibson91 authored Nov 18, 2024
2 parents 4c521cd + c33f956 commit 63abd57
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 5 deletions.
2 changes: 2 additions & 0 deletions config/clusters/nasa-cryo/prod.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ basehub:
GitHubOAuthenticator:
oauth_callback_url: https://hub.cryointhecloud.com/hub/oauth_callback
singleuser:
nodeSelector:
2i2c/hub-name: prod
extraEnv:
SCRATCH_BUCKET: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER)
Expand Down
2 changes: 2 additions & 0 deletions config/clusters/nasa-cryo/staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ basehub:
GitHubOAuthenticator:
oauth_callback_url: https://staging.hub.cryointhecloud.com/hub/oauth_callback
singleuser:
nodeSelector:
2i2c/hub-name: staging
extraEnv:
SCRATCH_BUCKET: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER)
Expand Down
63 changes: 58 additions & 5 deletions eksctl/nasa-cryo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,64 @@ local nodeAz = "us-west-2a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge", nameSuffix: "b" },
{ instanceType: "r5.4xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade
{ instanceType: "r5.4xlarge", nameSuffix: "b" },
{ instanceType: "r5.16xlarge" },
{ instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted
{
instanceType: "r5.xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" },
},
{
instanceType: "r5.xlarge",
namePrefix: "nb-prod",
labels+: { "2i2c/hub-name": "prod" },
tags+: { "2i2c:hub-name": "prod" },
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" },
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-prod",
labels+: { "2i2c/hub-name": "prod" },
tags+: { "2i2c:hub-name": "prod" },
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" },
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-prod",
labels+: { "2i2c/hub-name": "prod" },
tags+: { "2i2c:hub-name": "prod" },
},
{
instanceType: "g4dn.xlarge",
namePrefix: "staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: {
"2i2c:hub-name": "staging",
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
},
// Allow provisioning GPUs across all AZs, to prevent situation where all
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
{
instanceType: "g4dn.xlarge",
namePrefix: "prod",
labels+: { "2i2c/hub-name": "prod" },
tags+: {
"2i2c:hub-name": "prod",
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
},
taints+: {
Expand Down Expand Up @@ -95,7 +146,7 @@ local daskNodes = [
[
ng + {
namePrefix: 'core',
nameSuffix: 'b',
nameSuffix: 'a',
nameIncludeInstanceType: false,
availabilityZones: [nodeAz],
ssh: {
Expand All @@ -108,6 +159,7 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core"
},
tags+: { "2i2c:node-purpose": "core" },
},
] + [
ng + {
Expand All @@ -123,6 +175,7 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "user",
"k8s.dask.org/node-purpose": "scheduler"
},
tags+: { "2i2c:node-purpose": "user" },
taints+: {
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule"
Expand Down

0 comments on commit 63abd57

Please sign in to comment.