From 69e8acb9ed44b2c6a172368cc9bd9f17f167052d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Nov 2024 05:33:01 -0500 Subject: [PATCH] Fixed gpu detection for cuda rocm etc using env vars Signed-off-by: Brian --- ramalama/common.py | 2 +- ramalama/model.py | 71 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/ramalama/common.py b/ramalama/common.py index aa97bfac..f589a342 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -93,7 +93,7 @@ def run_cmd(args, cwd=None, stdout=subprocess.PIPE, ignore_stderr=False, debug=F if ignore_stderr: stderr = subprocess.PIPE - return subprocess.run(args, check=True, cwd=cwd, stdout=stdout, stderr=stderr) + return subprocess.run(args, check=True, cwd=cwd, stdout=stdout, stderr=stderr, text=True) def find_working_directory(): diff --git a/ramalama/model.py b/ramalama/model.py index 4bf13d55..03cfd458 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -98,12 +98,14 @@ def _image(self, args): if args.image != default_image(): return args.image - gpu_type, _ = get_gpu() - if gpu_type == "HIP_VISIBLE_DEVICES": + if os.getenv("HIP_VISIBLE_DEVICES"): return "quay.io/ramalama/rocm:latest" - if gpu_type == "ASAHI_VISIBLE_DEVICES": + if os.getenv("ASAHI_VISIBLE_DEVICES"): return "quay.io/ramalama/asahi:latest" + + if os.getenv("CUDA_VISIBLE_DEVICES"): + return "docker.io/brianmahabir/rama-cuda:v1" return args.image @@ -143,9 +145,15 @@ def setup_container(self, args): if os.path.exists("/dev/kfd"): conman_args += ["--device", "/dev/kfd"] - gpu_type, gpu_num = get_gpu() - if gpu_type == "HIP_VISIBLE_DEVICES" or gpu_type == "ASAHI_VISIBLE_DEVICES": - conman_args += ["-e", f"{gpu_type}={gpu_num}"] + for var in ["HIP_VISIBLE_DEVICES", "ASAHI_VISIBLE_DEVICES", "CUDA_VISIBLE_DEVICES"]: + value = os.getenv(var) + if value: + if var == "CUDA_VISIBLE_DEVICES": + # Special handling for CUDA (e.g., using '--gpus all') + conman_args += ["--gpus", "all"] + else: + # For HIP and ASAHI, we directly add the environment variable with its value + conman_args += ["-e", f"{var}={value}"] return conman_args def run_container(self, args, shortnames): @@ -190,14 +198,14 @@ def cleanup(): return True def gpu_args(self): + gpu_type, gpu_num = get_gpu() gpu_args = [] if sys.platform == "darwin": # llama.cpp will default to the Metal backend on macOS, so we don't need # any additional arguments. pass - elif sys.platform == "linux" and ( - os.getenv("HIP_VISIBLE_DEVICES") or os.getenv("ASAHI_VISIBLE_DEVICES") or os.getenv("CUDA_VISIBLE_DEVICES") - ): + elif sys.platform == "linux" and gpu_type is not None: + os.environ[gpu_type] = gpu_num gpu_args = ["-ngl", "99"] else: print("GPU offload was requested but is not available on this system") @@ -384,25 +392,52 @@ def check_valid_model_path(self, relative_target_path, model_path): def get_gpu(): i = 0 - gpu_num = 0 - gpu_bytes = 0 + amd_gpu_num = -1 + amd_gpu_bytes = 0 + nvidia_gpu_num = -1 + nvidia_gpu_mib = 0 + + # Check for AMD GPUs (ROCm/AMD case) for fp in sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total')): with open(fp, 'r') as file: content = int(file.read()) - if content > 1073741824 and content > gpu_bytes: - gpu_bytes = content - gpu_num = i + if content > 1073741824 and content > amd_gpu_bytes: + amd_gpu_bytes = content + amd_gpu_num = i i += 1 - if gpu_bytes: # this is the ROCm/AMD case - return "HIP_VISIBLE_DEVICES", gpu_num - + # Check if system is running Asahi Linux (Apple Silicon) if os.path.exists('/etc/os-release'): with open('/etc/os-release', 'r') as file: content = file.read() if "asahi" in content.lower(): - return "ASAHI_VISIBLE_DEVICES", 1 + return "ASAHI_VISIBLE_DEVICES", 1 # For Apple Silicon with Asahi Linux + + # Check for NVIDIA GPUs (CUDA case) + try: + command = ['nvidia-smi', '--query-gpu=index,memory.total', '--format=csv,noheader,nounits'] + output = run_cmd(command) + gpus = output.stdout.strip().split('\n') + gpus_sorted = sorted(gpus, key=lambda x: int(x.split(',')[1]), reverse=True) + nvidia_gpu_mib = int(gpus_sorted[0].split(',')[1]) + nvidia_gpu_num = gpus_sorted[0].split(',')[0] + + # Compare AMD and NVIDIA GPUs + if amd_gpu_bytes and nvidia_gpu_mib: + # bytes to MiB + amd_gpu_mib = amd_gpu_bytes / 1048576 + if amd_gpu_mib > nvidia_gpu_mib: + return "HIP_VISIBLE_DEVICES", amd_gpu_num + else: + return "CUDA_VISIBLE_DEVICES", nvidia_gpu_num + elif amd_gpu_bytes: + return "HIP_VISIBLE_DEVICES", amd_gpu_num + elif nvidia_gpu_mib: + return "CUDA_VISIBLE_DEVICES", nvidia_gpu_num + + except Exception: + pass # If no NVIDIA GPU is found or there's an error return None, None