Skip to content

Commit

Permalink
Add Nvidia GPU optimized AL2023 AMI (#1534)
Browse files Browse the repository at this point in the history
This change adds support for the AL2023 x86_64 GPU optimized AMI. See
[AWS
docs](https://docs.aws.amazon.com/eks/latest/userguide/retrieve-ami-id.html)
for a list of supported AMIs.

The AMI type (`AL2023_x86_64_NVIDIA`) is taken from the [AWS API
schema](https://docs.aws.amazon.com/eks/latest/APIReference/API_CreateNodegroup.html#AmazonEKS-CreateNodegroup-request-amiType).

Note: adding support for the Neuron based AMI type is tracked in
#1526. This will require
making the AMI selection instance type aware.

Relates to #1526
  • Loading branch information
flostadler authored Dec 23, 2024
1 parent e70daf7 commit 9a96842
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 3 deletions.
10 changes: 10 additions & 0 deletions nodejs/eks/ami.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ const amiTypeMetadata: { [key in AmiType]: AmiMetadata } = {

aliases: ["amazon-linux-2023/arm64/standard"],
},
AL2023_x86_64_NVIDIA: {
os: OperatingSystem.AL2023,
gpuSupport: true,
architecture: "x86_64",
ssmParameterName: (clusterVersion: ClusterVersion) =>
`/aws/service/eks/optimized-ami/${clusterVersion}/amazon-linux-2023/x86_64/nvidia/recommended/image_id`,

aliases: ["amazon-linux-2023/x86_64/nvidia"],
},
BOTTLEROCKET_ARM_64: {
os: OperatingSystem.Bottlerocket,
gpuSupport: false,
Expand Down Expand Up @@ -131,6 +140,7 @@ export const AmiType = {

AL2023X86_64Standard: "AL2023_x86_64_STANDARD",
AL2023Arm64Standard: "AL2023_ARM_64_STANDARD",
AL2023X86_64Nvidia: "AL2023_x86_64_NVIDIA",

BottlerocketArm64: "BOTTLEROCKET_ARM_64",
BottlerocketX86_64: "BOTTLEROCKET_x86_64",
Expand Down
5 changes: 4 additions & 1 deletion provider/cmd/pulumi-gen-eks/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1983,7 +1983,10 @@ func generateSchema(version semver.Version, outdir string) schema.PackageSpec {
Name: "AL2023Arm64Standard",
Value: "AL2023_ARM_64_STANDARD",
},

{
Name: "AL2023X86_64Nvidia",
Value: "AL2023_x86_64_NVIDIA",
},
{
Name: "BottlerocketArm64",
Value: "BOTTLEROCKET_ARM_64",
Expand Down
4 changes: 4 additions & 0 deletions provider/cmd/pulumi-resource-eks/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@
"name": "AL2023Arm64Standard",
"value": "AL2023_ARM_64_STANDARD"
},
{
"name": "AL2023X86_64Nvidia",
"value": "AL2023_x86_64_NVIDIA"
},
{
"name": "BottlerocketArm64",
"value": "BOTTLEROCKET_ARM_64"
Expand Down
1 change: 1 addition & 0 deletions sdk/dotnet/Enums.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ private AmiType(string value)
public static AmiType AL2Arm64 { get; } = new AmiType("AL2_ARM_64");
public static AmiType AL2023X86_64Standard { get; } = new AmiType("AL2023_x86_64_STANDARD");
public static AmiType AL2023Arm64Standard { get; } = new AmiType("AL2023_ARM_64_STANDARD");
public static AmiType AL2023X86_64Nvidia { get; } = new AmiType("AL2023_x86_64_NVIDIA");
public static AmiType BottlerocketArm64 { get; } = new AmiType("BOTTLEROCKET_ARM_64");
public static AmiType BottlerocketX86_64 { get; } = new AmiType("BOTTLEROCKET_x86_64");
public static AmiType BottlerocketArm64Nvidia { get; } = new AmiType("BOTTLEROCKET_ARM_64_NVIDIA");
Expand Down
1 change: 1 addition & 0 deletions sdk/go/eks/pulumiEnums.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sdk/java/src/main/java/com/pulumi/eks/enums/AmiType.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public enum AmiType {
AL2Arm64("AL2_ARM_64"),
AL2023X86_64Standard("AL2023_x86_64_STANDARD"),
AL2023Arm64Standard("AL2023_ARM_64_STANDARD"),
AL2023X86_64Nvidia("AL2023_x86_64_NVIDIA"),
BottlerocketArm64("BOTTLEROCKET_ARM_64"),
BottlerocketX86_64("BOTTLEROCKET_x86_64"),
BottlerocketArm64Nvidia("BOTTLEROCKET_ARM_64_NVIDIA"),
Expand Down
1 change: 1 addition & 0 deletions sdk/nodejs/types/enums/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ See for more details: https://docs.aws.amazon.com/eks/latest/userguide/al2023.ht
AL2Arm64: "AL2_ARM_64",
AL2023X86_64Standard: "AL2023_x86_64_STANDARD",
AL2023Arm64Standard: "AL2023_ARM_64_STANDARD",
AL2023X86_64Nvidia: "AL2023_x86_64_NVIDIA",
BottlerocketArm64: "BOTTLEROCKET_ARM_64",
BottlerocketX86_64: "BOTTLEROCKET_x86_64",
BottlerocketArm64Nvidia: "BOTTLEROCKET_ARM_64_NVIDIA",
Expand Down
1 change: 1 addition & 0 deletions sdk/python/pulumi_eks/_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class AmiType(str, Enum):
AL2_ARM64 = "AL2_ARM_64"
AL2023_X86_64_STANDARD = "AL2023_x86_64_STANDARD"
AL2023_ARM64_STANDARD = "AL2023_ARM_64_STANDARD"
AL2023_X86_64_NVIDIA = "AL2023_x86_64_NVIDIA"
BOTTLEROCKET_ARM64 = "BOTTLEROCKET_ARM_64"
BOTTLEROCKET_X86_64 = "BOTTLEROCKET_x86_64"
BOTTLEROCKET_ARM64_NVIDIA = "BOTTLEROCKET_ARM_64_NVIDIA"
Expand Down
2 changes: 1 addition & 1 deletion tests/nodejs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ func TestAccManagedNodeGroupOS(t *testing.T) {
foundNodes++
}
}
assert.Equal(t, 1, foundNodes, "Expected %s nodes with GPU")
assert.Equal(t, 2, foundNodes, "Expected %s nodes with Nvidia GPUs", foundNodes)
}))
},
})
Expand Down
78 changes: 78 additions & 0 deletions tests/testdata/programs/managed-ng-os/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
import * as awsx from "@pulumi/awsx";
import * as eks from "@pulumi/eks";
import * as k8s from "@pulumi/kubernetes";
import * as iam from "./iam";
import * as userdata from "./userdata";

Expand Down Expand Up @@ -127,6 +128,83 @@ const managedNodeGroupAL2023ArmUserData = eks.createManagedNodeGroup("al-2023-ar
kubeletExtraArgs: `--max-pods=${increasedPodCapacity}`,
});

const managedNodeGroupAL2023NvidiaGpu = eks.createManagedNodeGroup("al-2023-mng-nvidia-gpu", {
...scalingConfig,
cluster: cluster,
operatingSystem: eks.OperatingSystem.AL2023,
instanceTypes: ["g4dn.xlarge"],
nodeRole: role,
gpu: true,
labels: {
"nvidia-device-plugin-enabled": "true",
},
});

// Create a DaemonSet for the NVIDIA device plugin. The accelerated Amazon Linux AMIs come with the NVIDIA drivers
// installed, but without the device plugin. Without it, kubernetes will not be aware of the GPUs.
const nvidiaDevicePlugin = new k8s.apps.v1.DaemonSet("nvidia-device-plugin", {
metadata: {
name: "nvidia-device-plugin-daemonset",
namespace: "kube-system",
},
spec: {
selector: {
matchLabels: {
name: "nvidia-device-plugin-ds",
},
},
updateStrategy: {
type: "RollingUpdate",
},
template: {
metadata: {
labels: {
name: "nvidia-device-plugin-ds",
},
},
spec: {
tolerations: [{
key: "nvidia.com/gpu",
operator: "Exists",
effect: "NoSchedule",
}],
nodeSelector: {
"nvidia-device-plugin-enabled": "true",
},
priorityClassName: "system-node-critical",
containers: [{
name: "nvidia-device-plugin-ctr",
image: "nvcr.io/nvidia/k8s-device-plugin:v0.17.0",
env: [{
name: "FAIL_ON_INIT_ERROR",
value: "false",
}],
securityContext: {
allowPrivilegeEscalation: false,
capabilities: {
drop: ["ALL"],
},
},
volumeMounts: [{
name: "device-plugin",
mountPath: "/var/lib/kubelet/device-plugins",
}],
}],
volumes: [{
name: "device-plugin",
hostPath: {
path: "/var/lib/kubelet/device-plugins",
},
}],
},
},
},
}, {
provider: cluster.provider,
});



// Create a simple Bottlerocket node group with x64 instances
const managedNodeGroupBottlerocket = eks.createManagedNodeGroup("bottlerocket-mng", {
...scalingConfig,
Expand Down
3 changes: 2 additions & 1 deletion tests/testdata/programs/managed-ng-os/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"@pulumi/awsx": "^2.0.0",
"@pulumi/aws": "^6.50.1",
"@pulumi/eks": "latest",
"@pulumi/pulumi": "^3.0.0"
"@pulumi/pulumi": "^3.0.0",
"@pulumi/kubernetes": "^4.19.0"
}
}

0 comments on commit 9a96842

Please sign in to comment.