From a53ad53610f2dad78cd6fb3e5068ad7b7693aa89 Mon Sep 17 00:00:00 2001
From: Matt Micene <nzwulfin@gmail.com>
Date: Wed, 20 Nov 2024 08:48:20 -0500
Subject: [PATCH] Fall back to huggingface-cli when pulling via URL fails

Handle non GGUF files as well.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
---
 .../scripts/build_llama_and_whisper.sh        |  3 +-
 ramalama/huggingface.py                       | 31 ++++++++++++++++---
 ramalama/model.py                             |  3 +-
 test/system/040-serve.bats                    |  2 +-
 test/system/050-pull.bats                     |  5 +++
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh
index d2f7291e..f4f60825 100644
--- a/container-images/scripts/build_llama_and_whisper.sh
+++ b/container-images/scripts/build_llama_and_whisper.sh
@@ -29,7 +29,8 @@ dnf_install() {
   elif [ "$containerfile" = "rocm" ]; then
     dnf install -y rocm-dev hipblas-devel rocblas-devel
   elif [ "$containerfile" = "cuda" ]; then
-    dnf install -y "${rpm_list[@]}"
+    dnf install -y "${rpm_list[@]}" gcc-toolset-12
+    source /opt/rh/gcc-toolset-12/enable
   fi
 
   # For Vulkan image, we don't need to install anything extra but rebuild with
diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py
index f7d91525..979d770f 100644
--- a/ramalama/huggingface.py
+++ b/ramalama/huggingface.py
@@ -1,7 +1,7 @@
 import os
 import pathlib
 import urllib.request
-from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum
+from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum, perror
 from ramalama.model import Model
 
 missing_huggingface = """
@@ -45,8 +45,7 @@ def __init__(self, model):
 
     def login(self, args):
         if not self.hf_cli_available:
-            print("huggingface-cli not available, skipping login.")
-            return
+            raise NotImplementedError("huggingface-cli not available, skipping login.")
         conman_args = ["huggingface-cli", "login"]
         if args.token:
             conman_args.extend(["--token", args.token])
@@ -54,8 +53,7 @@ def login(self, args):
 
     def logout(self, args):
         if not self.hf_cli_available:
-            print("huggingface-cli not available, skipping logout.")
-            return
+            raise NotImplementedError("huggingface-cli not available, skipping logout.")
         conman_args = ["huggingface-cli", "logout"]
         if args.token:
             conman_args.extend(["--token", args.token])
@@ -69,6 +67,29 @@ def pull(self, args):
         symlink_dir = os.path.dirname(model_path)
         os.makedirs(symlink_dir, exist_ok=True)
 
+        try:
+            return self.url_pull(args, model_path, directory_path)
+        except (urllib.error.HTTPError, urllib.error.URLError, KeyError) as e:
+            try:
+                print("falling back to hf_cli")
+                return self.hf_pull(args, model_path, directory_path)
+            except NotImplementedError as e1:
+                perror(e1)
+            raise e
+
+    def hf_pull(self, args, model_path, directory_path):
+        if not self.hf_cli_available:
+            raise NotImplementedError("huggingface-cli not available, skipping logout.")
+
+        conman_args = ["huggingface-cli", "download", "--local-dir", directory_path, self.model]
+        run_cmd(conman_args, debug=args.debug)
+
+        relative_target_path = os.path.relpath(directory_path, start=os.path.dirname(model_path))
+        pathlib.Path(model_path).unlink(missing_ok=True)
+        os.symlink(relative_target_path, model_path)
+        return model_path
+
+    def url_pull(self, args, model_path, directory_path):
         # Fetch the SHA-256 checksum from the API
         checksum_api_url = f"https://huggingface.co/{self.directory}/raw/main/{self.filename}"
         try:
diff --git a/ramalama/model.py b/ramalama/model.py
index 3f4ebf4f..318886d4 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -100,7 +100,6 @@ def remove(self, args):
 
         self.garbage_collection(args)
 
-
     def _image(self, args):
         if args.image != default_image():
             return args.image
@@ -298,7 +297,7 @@ def serve(self, args):
 
         exec_args = ["llama-server", "--port", args.port, "-m", exec_model_path]
         if args.runtime == "vllm":
-            exec_args = ["vllm", "serve", "--port", args.port, exec_model_path]
+            exec_args = ["vllm", "serve", "--port", args.port, os.path.dirname(exec_model_path)]
         else:
             if args.gpu:
                 exec_args.extend(self.gpu_args())
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
index 5c2b4f3d..85a5c513 100644
--- a/test/system/040-serve.bats
+++ b/test/system/040-serve.bats
@@ -205,7 +205,7 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name
 
 	run cat $name.yaml
 	is "$output" ".*command: \[\"vllm\"\]" "command is correct"
-	is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models/model.file'\]" "args is correct"
+	is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models'\]" "args is correct"
 
 	is "$output" ".*image: quay.io/ramalama/ramalama:latest" "image is correct"
 	is "$output" ".*reference: ${ociimage}" "AI image should be created"
diff --git a/test/system/050-pull.bats b/test/system/050-pull.bats
index 9885ff6f..d24d8c1d 100644
--- a/test/system/050-pull.bats
+++ b/test/system/050-pull.bats
@@ -45,6 +45,11 @@ load setup_suite
     run_ramalama list
     is "$output" ".*afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k" "image was actually pulled locally"
     run_ramalama rm huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf
+
+    run_ramalama pull hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0
+    run_ramalama list
+    is "$output" ".*TinyLlama/TinyLlama-1.1B-Chat-v1.0" "image was actually pulled locally"
+    run_ramalama rm huggingface://TinyLlama/TinyLlama-1.1B-Chat-v1.0
 }
 
 # bats test_tags=distro-integration