Skip to content

Commit

Permalink
Initial cos variant support
Browse files Browse the repository at this point in the history
  • Loading branch information
tom91136 committed Sep 22, 2024
1 parent f671016 commit badfa28
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 15 deletions.
2 changes: 2 additions & 0 deletions images/Containerfile.cos.warewulf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
ARG COS_RELEASE
FROM docker.io/library/almalinux:${COS_RELEASE} AS cos.base.${COS_RELEASE}
ARG PACKAGES
ARG VARIANT

ENV PACKAGES=${PACKAGES}
ENV VARIANT=${VARIANT}

COPY ./cos-provision.sh /
RUN chmod +x /cos-provision.sh && /cos-provision.sh && rm -rf /cos-provision.sh
Expand Down
11 changes: 7 additions & 4 deletions images/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ opnsense.qcow2: OPNsense-$(OPNSENSE_VERSION)-dvd-amd64.iso opnsense.pkr.hcl
PACKAGES = micro \
zsh \
wireguard-tools dnf-automatic systemd-oomd lsb-release cryptsetup firewalld qemu-guest-agent coreutils e2fsprogs \
java-17-openjdk-headless bash-completion git wget curl rclone rsync \
java-17-openjdk-headless bash-completion git cmake wget curl rclone rsync \
htop lsof net-tools traceroute tcpdump iproute ethtool \
emacs-nox vim nano tree moreutils parallel tmux screen file which words ripgrep \
usbutils pciutils lm_sensors hwloc numactl ltrace strace perf valgrind
usbutils pciutils lm_sensors hwloc numactl psmisc ltrace strace perf valgrind

space := $(subst ,, )
comma := ,
Expand Down Expand Up @@ -78,14 +78,17 @@ cos.%.qcow2: AlmaLinux-$(ALMA_VERSION_MAJOR)-GenericCloud-$(ALMA_VERSION)-$(ALMA
almalinux.pkr.hcl
mv output-cos."$*"/$@ "$@"

VARIANT ?= plain

# Builds a warewulf uncompressed VNFS template of COS with warewulf patches
cos.%.warewulf.tar: Containerfile.cos.warewulf cos-provision.sh
podman build --security-opt label=disable --no-cache \
--build-arg PACKAGES="$(PACKAGES)" \
--build-arg VARIANT="$(VARIANT)" \
--build-arg COS_RELEASE="$(ALMA_VERSION)" \
--platform "linux/$*" \
-f Containerfile.cos.warewulf -t "warewulf_cos_$*"
podman save "warewulf_cos_$*" >cos.$*.warewulf.tar
-f Containerfile.cos.warewulf -t "warewulf_cos_$(VARIANT)_$*"
podman save "warewulf_cos_$(VARIANT)_$*" >cos.$(VARIANT).$*.warewulf.tar

# Builds a uncompressed RAW format of COS (the size of match the size of the disk)
cos.%.raw: cos.%.qcow2
Expand Down
69 changes: 64 additions & 5 deletions images/cos-provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,71 @@ dnf install -y "https://repos.openhpc.community/OpenHPC/3/EL_9/$(arch)/ohpc-rele
dnf config-manager --set-enabled crb
dnf copr enable cyqsimon/micro -y

# ELRepo setup for ML kernel
rpm --import "https://www.elrepo.org/RPM-GPG-KEY-elrepo.org"
dnf install -y "https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm"

dnf update -y
dnf install -y --enablerepo=elrepo-kernel kernel-ml kernel-ml-modules kernel-ml-devel

IFS='_' read -r -a values <<<"$VARIANT"
for value in "${values[@]}"; do
case $value in
plain)
dnf -y install kernel-core kernel-modules kernel-headers
;;
ml | lt)
# ELRepo setup for ML kernel
rpm --import "https://www.elrepo.org/RPM-GPG-KEY-elrepo.org"
dnf install -y "https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm"
dnf install -y --enablerepo=elrepo-kernel "kernel-$value" "kernel-$value-modules" "kernel-$value-devel"
;;
cuda)
case $(arch) in
aarch64) nv_arch="sbsa" ;;
*) nv_arch=$(arch) ;;
esac
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/$nv_arch/cuda-rhel9.repo
dnf module enable -y nvidia-driver:open-dkms
dnf install -y nvidia-driver-cuda
ls /lib/modules | xargs -n1 /usr/lib/dkms/dkms_autoinstaller start
dkms status
systemctl enable nvidia-persistenced
;;
rocm)
case $(arch) in
aarch64)
echo "ROCm is not supported on aarch64"
exit 1
;;
esac

sudo tee /etc/yum.repos.d/amdgpu.repo <<EOF
[amdgpu]
name=amdgpu
baseurl=https://repo.radeon.com/amdgpu/6.2.1/el/9.4/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
sudo tee --append /etc/yum.repos.d/rocm.repo <<EOF
[ROCm-6.2.1]
name=ROCm6.2.1
baseurl=https://repo.radeon.com/rocm/el9/6.2.1/main
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
dnf install -y amdgpu-dkms
ls /lib/modules | xargs -n1 /usr/lib/dkms/dkms_autoinstaller start
dkms status
;;
nec)
# TODO
;;
*)
echo "Unknown variant: $value"
;;
esac
done

dnf install -y --allowerasing --setopt=install_weak_deps=False \
ohpc-slurm-client ipa-client \
NetworkManager dhclient nfs-utils ipmitool openssh-clients openssh-server initscripts \
Expand Down
8 changes: 4 additions & 4 deletions playbook-task-sync-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
tasks:
- name: Copy COS image
ansible.builtin.copy:
src: "images/cos.{{item}}.warewulf.tar"
dest: "/root/cos.{{item}}.warewulf.tar"
src: "images/cos.plain.{{item}}.warewulf.tar"
dest: "/root/cos.plain.{{item}}.warewulf.tar"
loop: "{{all_arch}}"

- name: Import COS image
ansible.builtin.shell: | # XXX the first import sometime fails (!)
wwctl container import "/root/cos.{{item}}.warewulf.tar" "cos_{{item}}" --force --syncuser || \
wwctl container import "/root/cos.{{item}}.warewulf.tar" "cos_{{item}}" --force --syncuser
wwctl container import "/root/cos.plain.{{item}}.warewulf.tar" "cos_plain.{{item}}" --force --syncuser || \
wwctl container import "/root/cos.plain.{{item}}.warewulf.tar" "cos_plain.{{item}}" --force --syncuser
loop: "{{all_arch}}"
async: 600
poll: 0
Expand Down
4 changes: 2 additions & 2 deletions staging.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash:
mgmt_ip: '10.10.20.150',
mgmt_mac: 'BC:24:11:79:08:78',
pve: 'host',
image: 'cos_x86_64',
image: 'cos_plain.x86_64',
overlays: %w[wwinit generic arch-x86_64],
sockets: 1,
threads_per_core: 1,
Expand All @@ -164,7 +164,7 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash:
mgmt_ip: '10.10.20.151',
mgmt_mac: 'BC:24:11:79:08:79',
pve: 'aarch64',
image: 'cos_aarch64',
image: 'cos_plain.aarch64',
overlays: %w[wwinit generic arch-aarch64],
sockets: 1,
threads_per_core: 1,
Expand Down

0 comments on commit badfa28

Please sign in to comment.