Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test amazon linux 2023 support #129

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ trigger-pipeline:
matrix:
- DRIVER_VERSION: [535.183.06, 550.90.12, 560.35.03]

# Define the driver versions for jobs that can be run in parallel for amzn2023
.driver-versions-amzn2023:
parallel:
matrix:
- DRIVER_VERSION: [550.90.12, 560.35.03]

# Define the matrix of precompiled jobs that can be run in parallel for ubuntu22.04
.driver-versions-precompiled-ubuntu22.04:
parallel:
Expand All @@ -105,6 +111,10 @@ trigger-pipeline:
DIST: ubuntu22.04
CVE_UPDATES: "openssl"

.dist-amzn2023:
variables:
DIST: amzn2023

.dist-rhel8:
variables:
DIST: rhel8
Expand Down Expand Up @@ -174,6 +184,14 @@ trigger-pipeline:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .release-generic
- .driver-versions-amzn2023
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-rhel9:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -211,6 +229,15 @@ trigger-pipeline:
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-amzn2023:
extends:
- .release-amzn2023
variables:
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-rhel9:
extends:
- .release-rhel9
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ on:
- completed
branches:
- main
pull_request:
types:
- opened
- synchronize
branches:
- testshivaku
push:
branches:
- testshivaku


jobs:
e2e-tests-nvidiadriver:
Expand All @@ -36,6 +46,8 @@ jobs:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
AWS_SESSION_TOKEN: ${{ secrets.AWS_SESSION_TOKEN }}
AWS_DEFAULT_REGION: "us-west-1"
with:
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
dist:
- ubuntu20.04
- ubuntu22.04
- amzn2023
- rhel8
- rhel9
ispr:
Expand All @@ -53,6 +54,10 @@ jobs:
- ispr: true
dist: ubuntu20.04
driver: 550.90.12
- dist: amzn2023
driver: 535.183.06
- dist: amzn2023
driver: 550.90.12
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 14 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ include:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions-amzn2023
- .image-build-generic
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-rhel9:
# Perform for each DRIVER_VERSION
Expand All @@ -69,6 +78,11 @@ image-ubuntu22.04:
- .image-build-ubuntu22.04
- .dist-ubuntu22.04

image-amzn2023:
extends:
- .image-build-amzn2023
- .dist-amzn2023

image-rhel8:
extends:
- .image-build
Expand Down
35 changes: 35 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ image-rhel9:
- .dist-rhel9
- .driver-versions-rhel9

image-amzn2023:
extends:
- .image-pull
- .dist-amzn2023
- .driver-versions-amzn2023

# The .scan step forms the base of the image scan operation performed before releasing
# images.
.scan-generic:
Expand Down Expand Up @@ -190,6 +196,18 @@ image-rhel9:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-amzn2023:
# Repeat for each DRIVER_VERSION
extends:
- .driver-versions-amzn2023
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -235,6 +253,14 @@ scan-ubuntu22.04-arm64:
needs:
- image-ubuntu22.04

scan-amzn2023-amd64:
extends:
- .scan-amzn2023
- .dist-amzn2023
- .platform-amd64
needs:
- image-amzn2023

scan-precompiled-ubuntu22.04-amd64:
variables:
PLATFORM: linux/amd64
Expand Down Expand Up @@ -302,6 +328,12 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions-ubuntu22.04

release:ngc-amzn2023:
extends:
- .release:ngc
- .dist-amzn2023
- .driver-versions-amzn2023

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -478,6 +510,9 @@ sign:ngc-ubuntu-rhel-rhcos:
- SIGN_JOB_NAME: ["ubuntu"]
VERSION: ["20.04"]
DRIVER_VERSION: ["535.183.06", "550.90.12", "560.35.03"]
- SIGN_JOB_NAME: ["amzn"]
VERSION: ["2023"]
DRIVER_VERSION: ["560.35.03"]
- SIGN_JOB_NAME: ["rhel"]
VERSION: ["8.8", "8.10"]
DRIVER_VERSION: ["535.183.06", "550.90.12", "560.35.03"]
Expand Down
22 changes: 21 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 amzn2023 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
Expand Down Expand Up @@ -92,6 +92,10 @@ pull-signed_ubuntu22.04%: DIST = ubuntu22.04
pull-signed_ubuntu22.04%: DRIVER_TAG = $(DRIVER_BRANCH)
pull-signed_ubuntu22.04%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

pull-signed_amzn2023%: DIST = amzn2023
pull-signed_amzn2023%: DRIVER_TAG = $(DRIVER_BRANCH)
pull-signed_amzn2023%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

PLATFORM ?= linux/amd64
$(DRIVER_PULL_TARGETS): pull-%:
$(DOCKER) pull "--platform=$(PLATFORM)" "$(IMAGE)"
Expand All @@ -109,6 +113,10 @@ archive-signed_ubuntu22.04%: DIST = ubuntu22.04
archive-signed_ubuntu22.04%: DRIVER_TAG = $(DRIVER_BRANCH)
archive-signed_ubuntu22.04%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

archive-signed_amzn2023%: DIST = amzn2023
archive-signed_amzn2023%: DRIVER_TAG = $(DRIVER_BRANCH)
archive-signed_amzn2023%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

$(DRIVER_ARCHIVE_TARGETS): archive-%:
$(DOCKER) save "$(IMAGE)" -o "archive.tar"

Expand All @@ -130,6 +138,11 @@ push-signed_ubuntu22.04%: DRIVER_TAG = $(DRIVER_BRANCH)
push-signed_ubuntu22.04%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)
push-signed_ubuntu22.04%: OUT_IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

push-signed_amzn2023%: DIST = amzn2023
push-signed_amzn2023%: DRIVER_TAG = $(DRIVER_BRANCH)
push-signed_amzn2023%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)
push-signed_amzn2023%: OUT_IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)

# $(DRIVER_BUILD_TARGETS) is in the form of build-$(DIST)-$(DRIVER_VERSION)
# Parse the target to set the required variables.
build-%: DIST = $(word 2,$(subst -, ,$@))
Expand Down Expand Up @@ -175,6 +188,13 @@ build-signed_ubuntu22.04%: DRIVER_TAG = $(DRIVER_BRANCH)
build-signed_ubuntu22.04%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)
build-signed_ubuntu22.04%: DOCKER_BUILD_ARGS = --build-arg KERNEL_VERSION="$(KERNEL_VERSION)"

# amzn2023 Precompiled Driver
build-signed_amzn2023%: DIST = amzn2023
build-signed_amzn2023%: DRIVER_TAG = $(DRIVER_BRANCH)
build-signed_amzn2023%: IMAGE_TAG = $(DRIVER_BRANCH)-$(KERNEL_VERSION)-$(DIST)
build-signed_amzn2023%: DOCKER_BUILD_ARGS = --build-arg KERNEL_VERSION="$(KERNEL_VERSION)"
# build-signed_amzn2023%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

# base is an image used to poll Canonical for the latest kernel version
build-base-%: DOCKERFILE = $(CURDIR)/base/Dockerfile
build-base-%: TARGET = $(word 3,$(subst -, ,$@))
Expand Down
106 changes: 106 additions & 0 deletions amzn2023/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
ARG CUDA_VERSION=latest

FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-amzn2023 AS build
FROM amazonlinux:2

ARG TARGETARCH
ARG CUDA_VERSION

SHELL ["/bin/bash", "-c"]

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*

RUN yum update -y && yum install -y yum-utils && \
yum-config-manager --setopt=skip_missing_names_on_install=False && \
yum install -y \
gcc \
gcc-c++ \
make \
ca-certificates \
git -y && \
yum clean all

ENV GOLANG_VERSION=1.23.1

# download appropriate binary based on the target architecture for multi-arch builds
RUN OS_ARCH=$(echo ${TARGETARCH} | sed 's/x86_64/amd64/' ) && \
curl -s https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
COPY --from=build /work/vgpu-util /usr/local/bin

FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-amzn2023

SHELL ["/bin/bash", "-c"]

ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=550
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
ENV NVIDIA_VISIBLE_DEVICES=void

RUN echo "TARGETARCH=$TARGETARCH"

ADD install.sh /tmp

RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

COPY nvidia-driver /usr/local/bin

ADD drivers drivers/

# Fetch the installer automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
/tmp/install.sh download_installer; fi

RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
yum update -y && \
yum install -y \
nvidia-fabric-manager-${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y && yum install -y yum-utils && \
yum-config-manager --setopt=skip_missing_names_on_install=False && \
yum install -y \
${CVE_UPDATES} && \
yum clean all; fi

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*

# Add NGC DL license from the CUDA image
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
3 changes: 3 additions & 0 deletions amzn2023/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AmazonLinux2 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master)

See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
1 change: 1 addition & 0 deletions amzn2023/drivers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Folder for downloading vGPU drivers and dependent metadata files
Empty file added amzn2023/empty
Empty file.
Loading