From badfa2817255d9dbc6cb783ef8adade0cd0e5a46 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 22 Sep 2024 19:32:41 +0100 Subject: [PATCH] Initial cos variant support --- images/Containerfile.cos.warewulf | 2 + images/Makefile | 11 +++-- images/cos-provision.sh | 69 ++++++++++++++++++++++++++++--- playbook-task-sync-images.yml | 8 ++-- staging.rb | 4 +- 5 files changed, 79 insertions(+), 15 deletions(-) diff --git a/images/Containerfile.cos.warewulf b/images/Containerfile.cos.warewulf index b32f660..ed3a0cd 100644 --- a/images/Containerfile.cos.warewulf +++ b/images/Containerfile.cos.warewulf @@ -2,8 +2,10 @@ ARG COS_RELEASE FROM docker.io/library/almalinux:${COS_RELEASE} AS cos.base.${COS_RELEASE} ARG PACKAGES +ARG VARIANT ENV PACKAGES=${PACKAGES} +ENV VARIANT=${VARIANT} COPY ./cos-provision.sh / RUN chmod +x /cos-provision.sh && /cos-provision.sh && rm -rf /cos-provision.sh diff --git a/images/Makefile b/images/Makefile index 01116f9..b932a0c 100644 --- a/images/Makefile +++ b/images/Makefile @@ -34,10 +34,10 @@ opnsense.qcow2: OPNsense-$(OPNSENSE_VERSION)-dvd-amd64.iso opnsense.pkr.hcl PACKAGES = micro \ zsh \ wireguard-tools dnf-automatic systemd-oomd lsb-release cryptsetup firewalld qemu-guest-agent coreutils e2fsprogs \ - java-17-openjdk-headless bash-completion git wget curl rclone rsync \ + java-17-openjdk-headless bash-completion git cmake wget curl rclone rsync \ htop lsof net-tools traceroute tcpdump iproute ethtool \ emacs-nox vim nano tree moreutils parallel tmux screen file which words ripgrep \ - usbutils pciutils lm_sensors hwloc numactl ltrace strace perf valgrind + usbutils pciutils lm_sensors hwloc numactl psmisc ltrace strace perf valgrind space := $(subst ,, ) comma := , @@ -78,14 +78,17 @@ cos.%.qcow2: AlmaLinux-$(ALMA_VERSION_MAJOR)-GenericCloud-$(ALMA_VERSION)-$(ALMA almalinux.pkr.hcl mv output-cos."$*"/$@ "$@" +VARIANT ?= plain + # Builds a warewulf uncompressed VNFS template of COS with warewulf patches cos.%.warewulf.tar: Containerfile.cos.warewulf cos-provision.sh podman build --security-opt label=disable --no-cache \ --build-arg PACKAGES="$(PACKAGES)" \ + --build-arg VARIANT="$(VARIANT)" \ --build-arg COS_RELEASE="$(ALMA_VERSION)" \ --platform "linux/$*" \ - -f Containerfile.cos.warewulf -t "warewulf_cos_$*" - podman save "warewulf_cos_$*" >cos.$*.warewulf.tar + -f Containerfile.cos.warewulf -t "warewulf_cos_$(VARIANT)_$*" + podman save "warewulf_cos_$(VARIANT)_$*" >cos.$(VARIANT).$*.warewulf.tar # Builds a uncompressed RAW format of COS (the size of match the size of the disk) cos.%.raw: cos.%.qcow2 diff --git a/images/cos-provision.sh b/images/cos-provision.sh index 4d4d8cd..61429b9 100755 --- a/images/cos-provision.sh +++ b/images/cos-provision.sh @@ -7,12 +7,71 @@ dnf install -y "https://repos.openhpc.community/OpenHPC/3/EL_9/$(arch)/ohpc-rele dnf config-manager --set-enabled crb dnf copr enable cyqsimon/micro -y -# ELRepo setup for ML kernel -rpm --import "https://www.elrepo.org/RPM-GPG-KEY-elrepo.org" -dnf install -y "https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm" - dnf update -y -dnf install -y --enablerepo=elrepo-kernel kernel-ml kernel-ml-modules kernel-ml-devel + +IFS='_' read -r -a values <<<"$VARIANT" +for value in "${values[@]}"; do + case $value in + plain) + dnf -y install kernel-core kernel-modules kernel-headers + ;; + ml | lt) + # ELRepo setup for ML kernel + rpm --import "https://www.elrepo.org/RPM-GPG-KEY-elrepo.org" + dnf install -y "https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm" + dnf install -y --enablerepo=elrepo-kernel "kernel-$value" "kernel-$value-modules" "kernel-$value-devel" + ;; + cuda) + case $(arch) in + aarch64) nv_arch="sbsa" ;; + *) nv_arch=$(arch) ;; + esac + dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/$nv_arch/cuda-rhel9.repo + dnf module enable -y nvidia-driver:open-dkms + dnf install -y nvidia-driver-cuda + ls /lib/modules | xargs -n1 /usr/lib/dkms/dkms_autoinstaller start + dkms status + systemctl enable nvidia-persistenced + ;; + rocm) + case $(arch) in + aarch64) + echo "ROCm is not supported on aarch64" + exit 1 + ;; + esac + + sudo tee /etc/yum.repos.d/amdgpu.repo <