forked from aws-samples/awsome-distributed-training
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eks-g4dn-vpc.yaml
59 lines (53 loc) · 1.25 KB
/
eks-g4dn-vpc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
# Update cluster name, region, and eks version if needed
metadata:
name: eks-g4dn-vpc
version: "1.28"
region: PLACEHOLDER_AWS_REGION
# Substitute vpc and subnet ids below
vpc:
id: PLACEHOLDER_VPC_ID
subnets:
public:
public-one:
id: PLACEHOLDER_SUBNET_PUBLIC_1
public-two:
id: PLACEHOLDER_SUBNET_PUBLIC_2
private:
private-one:
id: PLACEHOLDER_SUBNET_PRIVATE_1
private-two:
id: PLACEHOLDER_SUBNET_PRIVATE_2
# Fully-managed nodegroups
managedNodeGroups:
# Nodegroup for system pods
- name: sys
instanceType: c5.2xlarge
desiredCapacity: 1
iam:
withAddonPolicies:
autoScaler: true
cloudWatch: true
# GPU nodegroup
# Update capacityReservationID below
# specify the subnet id corresponding to the capacity reservation
- name: g4dn
instanceType: g4dn.8xlarge
instancePrefix: g4dn-vpc
privateNetworking: true
efaEnabled: true
minSize: 0
desiredCapacity: 2
maxSize: 10
volumeSize: 500
subnets:
- PLACEHOLDER_SUBNET_PRIVATE_2
iam:
withAddonPolicies:
autoScaler: true
cloudWatch: true
ebs: true
fsx: true
iam:
withOIDC: true