From a53ad53610f2dad78cd6fb3e5068ad7b7693aa89 Mon Sep 17 00:00:00 2001 From: Matt Micene Date: Wed, 20 Nov 2024 08:48:20 -0500 Subject: [PATCH] Fall back to huggingface-cli when pulling via URL fails Handle non GGUF files as well. Signed-off-by: Daniel J Walsh --- .../scripts/build_llama_and_whisper.sh | 3 +- ramalama/huggingface.py | 31 ++++++++++++++++--- ramalama/model.py | 3 +- test/system/040-serve.bats | 2 +- test/system/050-pull.bats | 5 +++ 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh index d2f7291e..f4f60825 100644 --- a/container-images/scripts/build_llama_and_whisper.sh +++ b/container-images/scripts/build_llama_and_whisper.sh @@ -29,7 +29,8 @@ dnf_install() { elif [ "$containerfile" = "rocm" ]; then dnf install -y rocm-dev hipblas-devel rocblas-devel elif [ "$containerfile" = "cuda" ]; then - dnf install -y "${rpm_list[@]}" + dnf install -y "${rpm_list[@]}" gcc-toolset-12 + source /opt/rh/gcc-toolset-12/enable fi # For Vulkan image, we don't need to install anything extra but rebuild with diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py index f7d91525..979d770f 100644 --- a/ramalama/huggingface.py +++ b/ramalama/huggingface.py @@ -1,7 +1,7 @@ import os import pathlib import urllib.request -from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum +from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum, perror from ramalama.model import Model missing_huggingface = """ @@ -45,8 +45,7 @@ def __init__(self, model): def login(self, args): if not self.hf_cli_available: - print("huggingface-cli not available, skipping login.") - return + raise NotImplementedError("huggingface-cli not available, skipping login.") conman_args = ["huggingface-cli", "login"] if args.token: conman_args.extend(["--token", args.token]) @@ -54,8 +53,7 @@ def login(self, args): def logout(self, args): if not self.hf_cli_available: - print("huggingface-cli not available, skipping logout.") - return + raise NotImplementedError("huggingface-cli not available, skipping logout.") conman_args = ["huggingface-cli", "logout"] if args.token: conman_args.extend(["--token", args.token]) @@ -69,6 +67,29 @@ def pull(self, args): symlink_dir = os.path.dirname(model_path) os.makedirs(symlink_dir, exist_ok=True) + try: + return self.url_pull(args, model_path, directory_path) + except (urllib.error.HTTPError, urllib.error.URLError, KeyError) as e: + try: + print("falling back to hf_cli") + return self.hf_pull(args, model_path, directory_path) + except NotImplementedError as e1: + perror(e1) + raise e + + def hf_pull(self, args, model_path, directory_path): + if not self.hf_cli_available: + raise NotImplementedError("huggingface-cli not available, skipping logout.") + + conman_args = ["huggingface-cli", "download", "--local-dir", directory_path, self.model] + run_cmd(conman_args, debug=args.debug) + + relative_target_path = os.path.relpath(directory_path, start=os.path.dirname(model_path)) + pathlib.Path(model_path).unlink(missing_ok=True) + os.symlink(relative_target_path, model_path) + return model_path + + def url_pull(self, args, model_path, directory_path): # Fetch the SHA-256 checksum from the API checksum_api_url = f"https://huggingface.co/{self.directory}/raw/main/{self.filename}" try: diff --git a/ramalama/model.py b/ramalama/model.py index 3f4ebf4f..318886d4 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -100,7 +100,6 @@ def remove(self, args): self.garbage_collection(args) - def _image(self, args): if args.image != default_image(): return args.image @@ -298,7 +297,7 @@ def serve(self, args): exec_args = ["llama-server", "--port", args.port, "-m", exec_model_path] if args.runtime == "vllm": - exec_args = ["vllm", "serve", "--port", args.port, exec_model_path] + exec_args = ["vllm", "serve", "--port", args.port, os.path.dirname(exec_model_path)] else: if args.gpu: exec_args.extend(self.gpu_args()) diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index 5c2b4f3d..85a5c513 100644 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -205,7 +205,7 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name run cat $name.yaml is "$output" ".*command: \[\"vllm\"\]" "command is correct" - is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models/model.file'\]" "args is correct" + is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models'\]" "args is correct" is "$output" ".*image: quay.io/ramalama/ramalama:latest" "image is correct" is "$output" ".*reference: ${ociimage}" "AI image should be created" diff --git a/test/system/050-pull.bats b/test/system/050-pull.bats index 9885ff6f..d24d8c1d 100644 --- a/test/system/050-pull.bats +++ b/test/system/050-pull.bats @@ -45,6 +45,11 @@ load setup_suite run_ramalama list is "$output" ".*afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k" "image was actually pulled locally" run_ramalama rm huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf + + run_ramalama pull hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0 + run_ramalama list + is "$output" ".*TinyLlama/TinyLlama-1.1B-Chat-v1.0" "image was actually pulled locally" + run_ramalama rm huggingface://TinyLlama/TinyLlama-1.1B-Chat-v1.0 } # bats test_tags=distro-integration