diff --git a/.common-ci.yml b/.common-ci.yml index ec490963..16d601cd 100644 --- a/.common-ci.yml +++ b/.common-ci.yml @@ -166,6 +166,14 @@ trigger-pipeline: rules: - if: $CI_PIPELINE_SOURCE != "schedule" +.release-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .release-generic + - .driver-versions + rules: + - if: $CI_PIPELINE_SOURCE != "schedule" + .release-rhel9: # Perform for each DRIVER_VERSION extends: @@ -298,3 +306,14 @@ release:staging-precompiled-ubuntu22.04: - .release:staging-precompiled needs: - image-precompiled-ubuntu22.04 + +# Precompiled Ubuntu24.04 release +release:staging-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release:staging-precompiled + needs: + - image-precompiled-ubuntu24.04 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a46e34d3..fa5b321c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_ubuntu22.04.yaml" - name: Get public dns name id: get_public_dns_name diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index fd3d293b..8744cb5f 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -21,12 +21,10 @@ on: - opened - synchronize branches: - - main - - release-* + - main-no push: branches: - - main - - release-* + - main-no jobs: image: diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 769ce505..b8f62a96 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -15,9 +15,20 @@ # Run this workflow on a schedule name: Precompiled images +# on: +# schedule: +# - cron: '00 09 * * *' # scheduled job + on: - schedule: - - cron: '00 09 * * *' + pull_request: + types: + - opened + - synchronize + branches: + - ci-precompile-ubuntu24.04 + push: + branches: + - ci-precompile-ubuntu24.04 jobs: set-driver-version-matrix: @@ -33,17 +44,26 @@ jobs: id: extract_driver_branch run: | # get driver_branch - DRIVER_BRANCH=("535" "550") + # SHIVA + # DRIVER_BRANCH=("535" "550") + DRIVER_BRANCH=("550") driver_branch_json=$(printf '%s\n' "${DRIVER_BRANCH[@]}" | jq -R . | jq -cs .) echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT # get kernel flavors - KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle") + # SHIVA + # KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle") + # KERNEL_FLAVORS=("azure" "generic" "nvidia" "oracle") + # KERNEL_FLAVORS=("aws") + KERNEL_FLAVORS=("generic") kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .) echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT # get ubuntu distributions - DIST=("ubuntu22.04") + # SHIVA + # DIST=("ubuntu22.04" "ubuntu24.04") + # DIST=("ubuntu22.04") + DIST=("ubuntu24.04") dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .) echo "dist=$dist_json" >> $GITHUB_OUTPUT @@ -60,6 +80,8 @@ jobs: name: Check out code - name: Calculate build vars id: vars + env: + DIST: ${{ matrix.dist }} run: | echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV @@ -70,6 +92,14 @@ jobs: echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV + if [ "$DIST" == "ubuntu22.04" ]; then + echo "BASE_TARGET=jammy" >> $GITHUB_OUTPUT + echo "LTS_KERNEL=5.15" >> $GITHUB_OUTPUT + elif [ "$DIST" == "ubuntu24.04" ]; then + echo "BASE_TARGET=noble" >> $GITHUB_OUTPUT + echo "LTS_KERNEL=6.8" >> $GITHUB_OUTPUT + fi + - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -84,9 +114,10 @@ jobs: env: IMAGE_NAME: ghcr.io/nvidia/driver VERSION: ${COMMIT_SHORT_SHA} - BASE_TARGET: jammy + BASE_TARGET: ${{ steps.vars.outputs.BASE_TARGET }} + LTS_KERNEL: ${{ steps.vars.outputs.LTS_KERNEL }} run: | - make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET} + make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} LTS_KERNEL=${LTS_KERNEL} build-base-${BASE_TARGET} trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT docker run -d --name base-${BASE_TARGET}-${{ matrix.flavor }} ghcr.io/nvidia/driver:base-${BASE_TARGET}-${{ matrix.flavor }}-${{ matrix.driver_branch }} @@ -149,11 +180,18 @@ jobs: - name: Set kernel version id: set_kernel_version env: - BASE_TARGET: "jammy" DIST: ${{ matrix.dist }} run: | echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT + if [ "$DIST" == "ubuntu22.04" ]; then + export BASE_TARGET="jammy" + export LTS_KERNEL="5.15" + elif [ "$DIST" == "ubuntu24.04" ]; then + export BASE_TARGET="noble" + export LTS_KERNEL="6.8" + fi + kernel_flavors_json='${{ needs.set-driver-version-matrix.outputs.kernel_flavors }}' KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}' @@ -201,7 +239,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_${{ matrix.dist }}.yaml" - name: Get public dns name id: get_public_dns_name @@ -296,7 +334,7 @@ jobs: matrix: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} steps: - name: Check out code uses: actions/checkout@v4 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dc410ce0..1d2d7f98 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,6 +49,15 @@ include: rules: - if: $CI_PIPELINE_SOURCE != "schedule" +# Define the image build targets +.image-build-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions + - .image-build-generic + rules: + - if: $CI_PIPELINE_SOURCE != "schedule" + # Define the image build targets .image-build-rhel9: # Perform for each DRIVER_VERSION @@ -111,3 +120,12 @@ image-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 - .image-build-precompiled + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + CVE_UPDATES: "curl libc6" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-build-precompiled diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index 63554c39..ebf83796 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -77,6 +77,30 @@ variables: - !reference [.image-pull-rules, rules] +.image-pull-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions + - .image-pull-generic + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - !reference [.image-pull-rules, rules] + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + CVE_UPDATES: "curl libc6" + rules: + - when: delayed + start_in: 30 minutes + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-pull-generic + + .image-pull-ubuntu22.04: # Perform for each DRIVER_VERSION extends: @@ -184,6 +208,30 @@ image-rhel8: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - !reference [.pipeline-trigger-rules, rules] +.scan-ubuntu24.04: + # Repeat for each DRIVER_VERSION + extends: + - .driver-versions + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - !reference [.pipeline-trigger-rules, rules] + +.scan-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - when: always + .scan-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -294,6 +342,25 @@ release:ngc-ubuntu22.04: - .dist-ubuntu22.04 - .driver-versions +release:ngc-ubuntu24.04: + extends: + - .release:ngc + - .dist-ubuntu24.04 + - .driver-versions + +release:ngc-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release-generic + - .release:ngc-variables + rules: + # Only run NGC release job on scheduled pipelines + - if: $CI_PIPELINE_SOURCE == "schedule" + release:ngc-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -421,6 +488,23 @@ release:ngc-rhel8.10: - 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"' - ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia +sign:ngc-precompiled-ubuntu24.04: + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .dist-ubuntu22.04 + - .release-generic + - .release:ngc-variables + - .sign:ngc + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + needs: + - release:ngc-precompiled-ubuntu24.04 + rules: + # Only run NGC release job on scheduled pipelines + - if: $CI_PIPELINE_SOURCE == "schedule" + sign:ngc-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 @@ -445,6 +529,7 @@ sign:ngc-ubuntu-rhel-rhcos: matrix: - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["24.04"] + DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"] - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["22.04"] DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"] diff --git a/Makefile b/Makefile index 468af6cc..bad9c916 100644 --- a/Makefile +++ b/Makefile @@ -54,9 +54,9 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST) OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) ##### Public rules ##### -DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos +DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) -BASE_FROM := jammy focal +BASE_FROM := jammy focal noble PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) DRIVER_PUSH_TARGETS := $(foreach push_target, $(PUSH_TARGETS), $(addprefix $(push_target)-, $(DRIVER_VERSIONS))) BUILD_TARGETS := $(patsubst %, build-%, $(DISTRIBUTIONS)) @@ -210,6 +210,7 @@ $(BASE_BUILD_TARGETS): --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \ --build-arg KERNEL_FLAVOR="$(KERNEL_FLAVOR)" \ + --build-arg LTS_KERNEL="$(LTS_KERNEL)" \ --file $(DOCKERFILE) \ $(CURDIR)/base diff --git a/base/Dockerfile b/base/Dockerfile index 5b86a348..0387473f 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -1,12 +1,51 @@ +# Ubuntu 24.04 +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu24.04 AS noble + +SHELL ["/bin/bash", "-c"] + +ARG DRIVER_BRANCH +ARG KERNEL_FLAVOR +ARG LTS_KERNEL +ENV DRIVER_BRANCH=${DRIVER_BRANCH} +ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} + +# Remove cuda repository to avoid GPG errors +RUN rm -f /etc/apt/sources.list.d/cuda* + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +ENV NVIDIA_VISIBLE_DEVICES=void + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils git curl && \ + rm -rf /var/lib/apt/lists/* + +RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-updates main restricted" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-security main restricted" >> /etc/apt/sources.list && \ + usermod -o -u 0 -g 0 _apt + +COPY generate-ci-config /usr/local/bin/generate-ci-config + +RUN chmod +x /usr/local/bin/generate-ci-config && \ + generate-ci-config + +ENTRYPOINT ["/usr/bin/sleep","1000"] + # Ubuntu 22.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 as jammy +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS jammy SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* @@ -34,14 +73,16 @@ RUN chmod +x /usr/local/bin/generate-ci-config && \ ENTRYPOINT ["/usr/bin/sleep","1000"] # Ubuntu 20.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 as focal +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 AS focal SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* diff --git a/tests/holodeck.yaml b/tests/holodeck_ubuntu22.04.yaml similarity index 100% rename from tests/holodeck.yaml rename to tests/holodeck_ubuntu22.04.yaml diff --git a/tests/holodeck_ubuntu24.04.yaml b/tests/holodeck_ubuntu24.04.yaml new file mode 100644 index 00000000..759c7d93 --- /dev/null +++ b/tests/holodeck_ubuntu24.04.yaml @@ -0,0 +1,32 @@ +apiVersion: holodeck.nvidia.com/v1alpha1 +kind: Environment +metadata: + name: HOLODECK_NAME + description: "end-to-end test infrastructure" +spec: + provider: aws + auth: + keyName: cnt-ci + privateKey: HOLODECK_PRIVATE_KEY + instance: + type: g4dn.xlarge + region: us-west-1 + ingressIpRanges: + - 18.190.12.32/32 + - 3.143.46.93/32 + - 52.15.119.136/32 + - 35.155.108.162/32 + - 35.162.190.51/32 + - 54.201.61.24/32 + image: + architecture: amd64 + imageId: ami-0da424eb883458071 + containerRuntime: + install: true + name: containerd + version: 1.7.22 + kubernetes: + install: true + installer: kubeadm + version: v1.30.0 + crictlVersion: v1.30.0 diff --git a/tests/scripts/.definitions.sh b/tests/scripts/.definitions.sh index 945bb04c..3feb70cc 100644 --- a/tests/scripts/.definitions.sh +++ b/tests/scripts/.definitions.sh @@ -16,8 +16,8 @@ CASES_DIR="$( cd "${TEST_DIR}/cases" && pwd )" : ${HELM_NVIDIA_REPO:="https://helm.ngc.nvidia.com/nvidia"} -: ${DAEMON_POD_STATUS_TIME_OUT:="15m"} -: ${POD_STATUS_TIME_OUT:="2m"} +: ${DAEMON_POD_STATUS_TIME_OUT:="600m"} +: ${POD_STATUS_TIME_OUT:="600m"} : ${LOG_DIR:="/tmp/logs"} diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 2efa9a75..f45539b3 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2 + echo " Error:$0 must be called with BASE_TARGET KERNEL_FLAVORS DRIVER_BRANCHES DIST" >&2 exit 1 fi