fixed rocm permission error and updated rocm containerfile

containers · Oct 26, 2024 · 314683d · 314683d
1 parent c5dbbe0
commit 314683d
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 14 deletions.
diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile
@@ -13,26 +13,39 @@ RUN dnf config-manager --add-repo \
 RUN curl --retry 8 --retry-all-errors -o \
       /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official \
       http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-Official && \
-      cat /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official
 RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official
 
-RUN dnf install -y rocm-dev hipblas-devel rocblas-devel && \
-    dnf clean all && \
-    git clone https://github.com/ggerganov/llama.cpp && \
+# Set amd gpu architecture for RDNA3
+# https://llvm.org/docs/AMDGPUUsage.html#processors
+ENV AMDGPU_TARGETS=gfx1100
+
+# Set up llama.cpp
+RUN git clone https://github.com/ggerganov/llama.cpp && \
     cd llama.cpp && \
     git reset --hard ${LLAMA_CPP_SHA} && \
     cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
-      -DGGML_HIPBLAS=1 && \
-    cmake --build build --config Release -j $(nproc) && \
+      -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=${ROCM_DOCKER_ARCH} && \
+    cmake --build build --config Release -j$(nproc) && \
     cmake --install build && \
-    cd / && \
-    git clone https://github.com/ggerganov/whisper.cpp.git && \
+    cd /
+
+# Set up whisper.cpp
+RUN git clone https://github.com/ggerganov/whisper.cpp.git && \
     cd whisper.cpp && \
     git reset --hard ${WHISPER_CPP_SHA} && \
-    make -j $(nproc) GGML_HIPBLAS=1 && \
-    mv main /usr/bin/whisper-main && \
-    mv server /usr/bin/whisper-server && \
-    cd / && \
-    rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
-      /opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp
+    cmake -B build -DCMAKE_INSTALL_PREFIX:PATH=/usr -DGGML_CCACHE=0 \
+      -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=${ROCM_DOCKER_ARCH} && \
+    cmake --build build --config Release -j$(nproc) && \
+    # Move whisper binaries to /usr/bin
+    mv build/bin/main /usr/bin/whisper-main && \
+    mv build/bin/server /usr/bin/whisper-server && \
+    # Move any shared libraries to a standard library directory if needed
+    if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/libwhisper.so; fi && \
+    # Update the dynamic linker cache if any shared libraries were moved
+    ldconfig || true && \
+    # Clean up
+    cd /
 
+# Clean up
+RUN rm -rf /var/cache/*dnf* /opt/rocm-*/lib/llvm \
+      /opt/rocm-*/lib/rocblas/library/*gfx9* llama.cpp whisper.cpp
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -679,6 +679,8 @@ def run_container(args):
     if gpu_type == "HIP_VISIBLE_DEVICES":
         conman_args += ["-e", f"{gpu_type}={gpu_num}"]
         if args.image == default_image():
+            # https://github.com/containers/podman/issues/10166
+            conman_args += ["--group-add", "keep-groups"]
             conman_args += ["quay.io/ramalama/rocm:latest"]
         else:
             conman_args += [args.image]