Skip to content

Commit

Permalink
Fall back to huggingface-cli when pulling via URL fails
Browse files Browse the repository at this point in the history
Handle non GGUF files as well.

Signed-off-by: Daniel J Walsh <[email protected]>
  • Loading branch information
nzwulfin authored and rhatdan committed Nov 20, 2024
1 parent 388eaa1 commit a53ad53
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 9 deletions.
3 changes: 2 additions & 1 deletion container-images/scripts/build_llama_and_whisper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ dnf_install() {
elif [ "$containerfile" = "rocm" ]; then
dnf install -y rocm-dev hipblas-devel rocblas-devel
elif [ "$containerfile" = "cuda" ]; then
dnf install -y "${rpm_list[@]}"
dnf install -y "${rpm_list[@]}" gcc-toolset-12
source /opt/rh/gcc-toolset-12/enable
fi

# For Vulkan image, we don't need to install anything extra but rebuild with
Expand Down
31 changes: 26 additions & 5 deletions ramalama/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import pathlib
import urllib.request
from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum
from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum, perror
from ramalama.model import Model

missing_huggingface = """
Expand Down Expand Up @@ -45,17 +45,15 @@ def __init__(self, model):

def login(self, args):
if not self.hf_cli_available:
print("huggingface-cli not available, skipping login.")
return
raise NotImplementedError("huggingface-cli not available, skipping login.")
conman_args = ["huggingface-cli", "login"]
if args.token:
conman_args.extend(["--token", args.token])
self.exec(conman_args, args)

def logout(self, args):
if not self.hf_cli_available:
print("huggingface-cli not available, skipping logout.")
return
raise NotImplementedError("huggingface-cli not available, skipping logout.")
conman_args = ["huggingface-cli", "logout"]
if args.token:
conman_args.extend(["--token", args.token])
Expand All @@ -69,6 +67,29 @@ def pull(self, args):
symlink_dir = os.path.dirname(model_path)
os.makedirs(symlink_dir, exist_ok=True)

try:
return self.url_pull(args, model_path, directory_path)
except (urllib.error.HTTPError, urllib.error.URLError, KeyError) as e:
try:
print("falling back to hf_cli")
return self.hf_pull(args, model_path, directory_path)
except NotImplementedError as e1:
perror(e1)
raise e

def hf_pull(self, args, model_path, directory_path):
if not self.hf_cli_available:
raise NotImplementedError("huggingface-cli not available, skipping logout.")

conman_args = ["huggingface-cli", "download", "--local-dir", directory_path, self.model]
run_cmd(conman_args, debug=args.debug)

relative_target_path = os.path.relpath(directory_path, start=os.path.dirname(model_path))
pathlib.Path(model_path).unlink(missing_ok=True)
os.symlink(relative_target_path, model_path)
return model_path

def url_pull(self, args, model_path, directory_path):
# Fetch the SHA-256 checksum from the API
checksum_api_url = f"https://huggingface.co/{self.directory}/raw/main/{self.filename}"
try:
Expand Down
3 changes: 1 addition & 2 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def remove(self, args):

self.garbage_collection(args)


def _image(self, args):
if args.image != default_image():
return args.image
Expand Down Expand Up @@ -298,7 +297,7 @@ def serve(self, args):

exec_args = ["llama-server", "--port", args.port, "-m", exec_model_path]
if args.runtime == "vllm":
exec_args = ["vllm", "serve", "--port", args.port, exec_model_path]
exec_args = ["vllm", "serve", "--port", args.port, os.path.dirname(exec_model_path)]
else:
if args.gpu:
exec_args.extend(self.gpu_args())
Expand Down
2 changes: 1 addition & 1 deletion test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name

run cat $name.yaml
is "$output" ".*command: \[\"vllm\"\]" "command is correct"
is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models/model.file'\]" "args is correct"
is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models'\]" "args is correct"

is "$output" ".*image: quay.io/ramalama/ramalama:latest" "image is correct"
is "$output" ".*reference: ${ociimage}" "AI image should be created"
Expand Down
5 changes: 5 additions & 0 deletions test/system/050-pull.bats
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ load setup_suite
run_ramalama list
is "$output" ".*afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k" "image was actually pulled locally"
run_ramalama rm huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf

run_ramalama pull hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0
run_ramalama list
is "$output" ".*TinyLlama/TinyLlama-1.1B-Chat-v1.0" "image was actually pulled locally"
run_ramalama rm huggingface://TinyLlama/TinyLlama-1.1B-Chat-v1.0
}

# bats test_tags=distro-integration
Expand Down

0 comments on commit a53ad53

Please sign in to comment.