From 5a08cb79bdc9cab781b80bf804c51dc444fbf70c Mon Sep 17 00:00:00 2001 From: "Fred N. Garvin" <184324400+FNGarvin@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:13:17 -0600 Subject: [PATCH 1/2] Update model.py to enable CUDA when available Shell out to nvidia-smi for NVidia detection, add device or gpu args to conman as appropriate for docker vs podman Signed-off-by: Fred N. Garvin, Esq. <184324400+FNGarvin@users.noreply.github.com> --- ramalama/model.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ramalama/model.py b/ramalama/model.py index 5ee217bc..54ac693c 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -152,6 +152,16 @@ def setup_container(self, args): gpu_type, gpu_num = get_gpu() if gpu_type == "HIP_VISIBLE_DEVICES" or gpu_type == "ASAHI_VISIBLE_DEVICES": conman_args += ["-e", f"{gpu_type}={gpu_num}"] + + #podman is not a drop-in replacement for docker... need to "see" which we're in here, ala common.py->available logic + #risk of duplicating code here, maint problem + if gpu_type == "CUDA_VISIBLE_DEVICES": + if shutil.which("podman"): + #todo is /all/ appropriate? + conman_args += ["--device", "nvidia.com/gpu=all"] #AFAIK, cuda requires this for podman + else: + conman_args += ["--gpus", "all"] #and this for Docker + return conman_args def run_container(self, args, shortnames): @@ -389,7 +399,15 @@ def get_gpu(): content = file.read() if "asahi" in content.lower(): return "ASAHI_VISIBLE_DEVICES", 1 - + + try: + #TODO I don't currently have access to a PC w/ multiple NVidia GPUs nor an NVidia Mac... but I *think* that + #every Linux and Windows machine having modern NVidia will have nvidia-smi and that the number of lines corresponds to the number of zero-indexed gpus + check_output = subprocess.run(['nvidia-smi', '-L'], check=True, capture_output=True) #shell to nvidia-smi + if not check_output.returncode: #if command EXIT_SUCCESS + return "CUDA_VISIBLE_DEVICES", len(check_output.stdout.splitlines()) # ret cuda, #gpus? + except Exception: {} #fall through + return None, None From c638d979a69a20d3c2fe6feb93843828ed979159 Mon Sep 17 00:00:00 2001 From: "Fred N. Garvin" <184324400+FNGarvin@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:36:52 -0600 Subject: [PATCH 2/2] Update model.py import subprocess (to shell out for nvidia-smi) and shutil (to duplicate available() functionality and test whether we're in podman or docker Signed-off-by: Fred N. Garvin, Esq. <184324400+FNGarvin@users.noreply.github.com> --- ramalama/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ramalama/model.py b/ramalama/model.py index 54ac693c..2514fed4 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -3,6 +3,8 @@ import glob import atexit import shlex +import subprocess +import shutil from ramalama.common import ( default_image,