Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2i2c-aws-us: Nodegroup split and k8s update #5106

Merged
merged 8 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/cosmicds.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jupyterhub:
name: Cosmic DS, Harvard
url: https://www.cosmicds.cfa.harvard.edu/
singleuser:
nodeSelector:
2i2c/hub-name: cosmicds
# No persistent storage should be kept to reduce any potential data
# retention & privacy issues.
# Ref https://github.com/2i2c-org/infrastructure/issues/2128#issuecomment-1635107926
Expand Down
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/dask-staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ basehub:
name: 2i2c
url: https://2i2c.org
singleuser:
nodeSelector:
2i2c/hub-name: dask-staging
image:
name: pangeo/pangeo-notebook
tag: "latest"
Expand All @@ -39,3 +41,14 @@ basehub:
authenticator_class: "github"
GitHubOAuthenticator:
oauth_callback_url: "https://dask-staging.aws.2i2c.cloud/hub/oauth_callback"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: dask-staging
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: dask-staging
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/itcoocean.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ jupyterhub:
admin_users:
- eeholmes # Eli Holmes, Community representative
singleuser:
nodeSelector:
2i2c/hub-name: itcoocean
# Requested in https://2i2c.freshdesk.com/a/tickets/1320
defaultUrl: /lab
# shared-public for collaboration
Expand Down
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/ncar-cisl.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ basehub:
- NicholasCote # Nicholas Cote, Initial administrator
- nwehrheim # Nick Wehrheim, Community representative
singleuser:
nodeSelector:
2i2c/hub-name: ncar-cisl
image:
# image choice preliminary and is expected to be setup via
# https://ncar-cisl.2i2c.cloud/services/configurator/ by the community
Expand Down Expand Up @@ -250,3 +252,14 @@ basehub:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: "1"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: ncar-cisl
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: ncar-cisl
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/showcase.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ basehub:
Authenticator:
enable_auth_state: true
singleuser:
nodeSelector:
2i2c/hub-name: showcase
extraEnv:
SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-showcase/$(JUPYTERHUB_USER)
PERSISTENT_BUCKET: s3://2i2c-aws-us-persistent-showcase/$(JUPYTERHUB_USER)
Expand Down Expand Up @@ -246,3 +248,14 @@ basehub:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: "1"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: showcase
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: showcase
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,7 @@ jupyterhub:
GitHubOAuthenticator:
oauth_callback_url: "https://staging.aws.2i2c.cloud/hub/oauth_callback"
singleuser:
nodeSelector:
2i2c/hub-name: staging
extraEnv:
SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-staging/$(JUPYTERHUB_USER)
169 changes: 163 additions & 6 deletions eksctl/2i2c-aws-us.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,106 @@ local nodeAz = "us-west-2a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
// staging
{
instanceType: "r5.xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
// dask-staging
{
instanceType: "r5.xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
// showcase
{
instanceType: "r5.xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "g4dn.xlarge",
namePrefix: "nb-showcase",
minSize: 0,
labels+: { "2i2c/hub-name": "showcase" },
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1",
"2i2c:hub-name": "showcase",
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
},
// Allow provisioning GPUs across all AZs, to prevent situation where all
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
// ncar-cisl
{
instanceType: "r5.xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "g4dn.xlarge",
namePrefix: "nb-ncar-cisl",
minSize: 0,
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1",
"2i2c:hub-name": "ncar-cisl",
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
Expand All @@ -40,6 +133,44 @@ local notebookNodes = [
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
// itcoocean
{
instanceType: "r5.xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
// cosmicds
{
instanceType: "r5.xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
];


Expand All @@ -54,7 +185,24 @@ local daskNodes = [
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
{
namePrefix: "dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
{
namePrefix: "dask-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
{
namePrefix: "dask-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
];


Expand All @@ -64,7 +212,7 @@ local daskNodes = [
metadata+: {
name: "2i2c-aws-us",
region: clusterRegion,
version: "1.29",
version: "1.30",
},
availabilityZones: masterAzs,
iam: {
Expand Down Expand Up @@ -108,6 +256,9 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core"
},
tags+: {
"2i2c:node-purpose": "core"
},
},
] + [
ng + {
Expand All @@ -123,6 +274,9 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "user",
"k8s.dask.org/node-purpose": "scheduler"
},
tags+: {
"2i2c:node-purpose": "user"
},
taints+: {
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule"
Expand All @@ -145,6 +299,9 @@ local daskNodes = [
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule"
},
tags+: {
"2i2c:node-purpose": "worker"
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
Expand Down