From c1af722981ad468b6b74e7759f68c6979fddb6d5 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Mon, 28 Oct 2024 20:20:17 +0000 Subject: [PATCH 1/7] Enable configuration for AMD GPUs --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install.sh index bb6ff7b8..ddcb5d89 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install.sh @@ -7,15 +7,13 @@ if [[ "$#" -gt 0 ]]; then INPUT=$1 if [ "$INPUT" == "AMD" ]; then GPUi="AMD" - echo "ERROR, the AMD pathway is not fully implemented yet." - exit 1 + echo "Configuring VM for AMD GPUs." elif [ "$INPUT" != "NVIDIA" ]; then echo "Error: Invalid GPU type. Please specify 'NVIDIA' or 'AMD'." exit 1 fi fi - export GPU=$GPUi # install pre-requisites From 6dcb7a23379a14af40c25e2a853eb8e1bf0af791 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Mon, 28 Oct 2024 20:20:54 +0000 Subject: [PATCH 2/7] Download and install amdgpu_install --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh index 7011acb4..3baafb86 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh @@ -1,9 +1,14 @@ #!/bin/bash set -ex -#move to rocm package -./amdgpu-install -y --usecase=graphics,rocm +pushd /tmp +wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.2.60202-1_all.deb +sudo apt install ./amdgpu-install_6.2.60202-1_all.deb +rm -f amdgpu-install_6.2.60202-1_all.deb +popd +#move to rocm package +amdgpu-install -y --usecase=graphics,rocm #Add self to render and video groups so they can access gpus. usermod -a -G render $(logname) From b72f2633c33ba812a2cec006c73a63f7f16ac1a1 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 29 Oct 2024 10:20:50 -0500 Subject: [PATCH 3/7] Update install_rocm.sh Skip confirmation prompt --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh index 3baafb86..c271695f 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh @@ -3,7 +3,7 @@ set -ex pushd /tmp wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.2.60202-1_all.deb -sudo apt install ./amdgpu-install_6.2.60202-1_all.deb +sudo apt install -y ./amdgpu-install_6.2.60202-1_all.deb rm -f amdgpu-install_6.2.60202-1_all.deb popd From 921414c3b041a56adf15a4026924c9970ca810ec Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Tue, 29 Oct 2024 10:21:57 -0500 Subject: [PATCH 4/7] Update install_rccl.sh Install newer version of cmake required by MSCCLPP --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rccl.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rccl.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rccl.sh index 595ce534..6d457b78 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rccl.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rccl.sh @@ -1,6 +1,18 @@ #!/bin/bash set -ex +pushd /tmp +wget https://github.com/Kitware/CMake/releases/download/v3.30.5/cmake-3.30.5-linux-x86_64.tar.gz +tar xzf cmake-3.30.5-linux-x86_64.tar.gz +pushd cmake-3.30.5-linux-x86_64 +pushd bin +sudo mv -f ccmake cmake cpack ctest /usr/local/bin +popd +sudo cp -r share/cmake-3.30 /usr/local/share/ +popd +rm -rf cmake-3.30.5-linux-x86_64* +popd + apt install libstdc++-12-dev apt remove -y rccl pushd ~ From d5c9269ee25ebb7306a0a7ee397d44446d98c5bb Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Fri, 6 Dec 2024 10:33:28 -0600 Subject: [PATCH 5/7] Updated ROCm version --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh index c271695f..e541ef53 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh @@ -2,9 +2,9 @@ set -ex pushd /tmp -wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.2.60202-1_all.deb -sudo apt install -y ./amdgpu-install_6.2.60202-1_all.deb -rm -f amdgpu-install_6.2.60202-1_all.deb +wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.3.60300-1_all.deb +sudo apt install -y ./amdgpu-install_6.3.60300-1_all.deb +rm -f amdgpu-install_6.3.60300-1_all.deb popd #move to rocm package From 68067aced2bd6e25765f315bb987644bc7122998 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Mon, 9 Dec 2024 16:19:54 -0600 Subject: [PATCH 6/7] Revert back to ROCm 6.2.2 --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh index e541ef53..9e717a23 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh @@ -2,9 +2,9 @@ set -ex pushd /tmp -wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.3.60300-1_all.deb -sudo apt install -y ./amdgpu-install_6.3.60300-1_all.deb -rm -f amdgpu-install_6.3.60300-1_all.deb +wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/jammy/amdgpu-install_6.2.60202-1_all.deb +sudo apt install -y ./amdgpu-install_6.2.60202-1_all.deb +rm -f amdgpu-install_6.2.60202-1_all.deb popd #move to rocm package From dc7a2cb115ecf0bc9f7cf4b69381c757ffe67326 Mon Sep 17 00:00:00 2001 From: Davide Vanzo Date: Fri, 20 Dec 2024 15:48:19 -0600 Subject: [PATCH 7/7] Fix default secondary groups --- ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh index 9e717a23..0ff0273f 100755 --- a/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh +++ b/ubuntu/ubuntu-22.x/ubuntu-22.04-hpc/install_rocm.sh @@ -16,8 +16,7 @@ usermod -a -G video $(logname) #add future new users to the render and video groups. echo 'ADD_EXTRA_GROUPS=1' | tee -a /etc/adduser.conf -echo 'EXTRA_GROUPS=video' | tee -a /etc/adduser.conf -echo 'EXTRA_GROUPS=render' | tee -a /etc/adduser.conf +echo 'EXTRA_GROUPS="video render"' | tee -a /etc/adduser.conf #add nofile limits string_so="* soft nofile 1048576"