Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Jul 30, 2024
1 parent 1aa04f0 commit 1b7ddb1
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CLI Llama.cpp Tests
name: CLI CPU Llama.Cpp Tests

on:
workflow_dispatch:
Expand Down Expand Up @@ -26,15 +26,15 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

jobs:
run_cli_llama_cpp_tests:
run_cli_cpu_llama_cpp_tests:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python 3.10
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: "3.10"

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices

*News* 📰

- LlamaCpp backend for benchmarking [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) bindings with all its supported devices 🚀
- 🥳 PyPI package is now available for installation: `pip install optimum-benchmark` 🎉 [check it out](https://pypi.org/project/optimum-benchmark/) !
- Model loading latency/memory/energy tracking for all backends in the inference scenario 🚀
- numactl support for Process and Torchrun launchers to control the NUMA nodes on which the benchmark runs.
Expand Down Expand Up @@ -47,6 +48,7 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices

### CLI 📈

[![CLI_CPU_LLAMA_CPP](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_llama_cpp.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_llama_cpp.yaml)
[![CLI_CPU_NEURAL_COMPRESSOR](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml)
[![CLI_CPU_ONNXRUNTIME](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml)
[![CLI_CPU_OPENVINO](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml)
Expand Down
18 changes: 12 additions & 6 deletions optimum_benchmark/backends/llama_cpp/backend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from tempfile import TemporaryDirectory
from typing import Any, Dict, Tuple
import subprocess

from llama_cpp import Llama

Expand Down Expand Up @@ -28,20 +29,25 @@ def load_model_from_pretrained(self) -> None:
"""
Load the pretrained model from the given model name (normally GGUF, GGML)
"""
embedding = True if self.config.task == "feature-extraction" else False

self.pretrained_model = Llama.from_pretrained(
repo_id=self.config.model, # type: ignore
repo_id=self.config.model,
filename=self.config.filename,
verbose=False,
echo=False,
embedding=embedding,
) # type: ignore
**self.llama_cpp_kwargs,
)

def validate_task(self) -> None:
if self.config.task not in ["text-generation"]:
raise ValueError(f"Task {self.config.task} not supported by {self.NAME}")

@property
def llama_cpp_kwargs(self) -> Dict[str, Any]:
return {
"embedding": self.config.task == "feature-extraction",
"verbose": False,
"echo": False,
}

def prepare_inputs(self, inputs: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
if self.config.task == "text-generation":
if inputs["input_ids"].shape[0] != 1:
Expand Down
8 changes: 0 additions & 8 deletions optimum_benchmark/backends/llama_cpp/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@
from ...import_utils import llama_cpp_version
from ..config import BackendConfig

LOGGER = getLogger("backend")


def llama_cpp_model_kwargs():
return {"verbose": True}


@dataclass
class LlamaCppConfig(BackendConfig):
Expand All @@ -30,5 +24,3 @@ def __post_init__(self):

if self.device not in ["cuda", "mps", "cpu"]:
raise ValueError(f"Llama.cpp Backend only supports 'cpu', 'mps' and 'cuda' devices, got {self.device}")

LOGGER.warning("Llama.cpp automatically selects the device, ignoring the device parameter in the config.")

0 comments on commit 1b7ddb1

Please sign in to comment.