From 327479981fa3ebe45b7d0fd2bd0c298df268c920 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Thu, 4 Apr 2024 12:50:46 -0500 Subject: [PATCH] Convert GGML to expect GGUF format (#581) * initial pass at improved ggml error handling * more config validation * update README.md and sanity check GGUF file header * Consistent format reference & more specific errors --- README.md | 2 +- garak/generators/base.py | 2 +- garak/generators/ggml.py | 92 +++++++++++++++++++++++++++------------- pyproject.toml | 3 +- requirements.txt | 3 +- 5 files changed, 68 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 959807fc8..ea7696876 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ currently supports: * [hugging face hub](https://huggingface.co/models) generative models * [replicate](https://replicate.com/) text models * [openai api](https://platform.openai.com/docs/introduction) chat & continuation models -* ggml models like [llama.cpp](https://github.com/ggerganov/llama.cpp) +* gguf models like [llama.cpp](https://github.com/ggerganov/llama.cpp) version >= 1046 * .. and many more LLMs! ## Install: diff --git a/garak/generators/base.py b/garak/generators/base.py index 63d78c76a..00045af88 100644 --- a/garak/generators/base.py +++ b/garak/generators/base.py @@ -54,7 +54,7 @@ def _call_model(self, prompt: str) -> Union[List[str], str, None]: succeed or raise an exception. The @backoff decorator can be helpful here - see garak.generators.openai for an example usage. - Can return None if no reponse was elicited""" + Can return None if no response was elicited""" raise NotImplementedError def _pre_generate_hook(self): diff --git a/garak/generators/ggml.py b/garak/generators/ggml.py index d13ab467b..c337dfbf9 100644 --- a/garak/generators/ggml.py +++ b/garak/generators/ggml.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ggml generator support -This generator works with ggml models like llama.cpp. +This generator works with ggml models in gguf format like llama.cpp. Put the path to your ggml executable (e.g. "/home/leon/llama.cpp/main") in an environment variable named GGML_MAIN_PATH, and pass the path to the @@ -13,6 +13,7 @@ """ +import logging import os import re import subprocess @@ -20,9 +21,10 @@ from garak import _config from garak.generators.base import Generator +GGUF_MAGIC = bytes([0x47, 0x47, 0x55, 0x46]) class GgmlGenerator(Generator): - """Generator interface for ggml models. + """Generator interface for ggml models in gguf format. Set the path to the model as the model name, and put the path to the ggml executable in environment variable GGML_MAIN_PATH. """ @@ -34,51 +36,81 @@ class GgmlGenerator(Generator): top_p = 0.95 temperature = 0.8 exception_on_failure = True + first_call = True generator_family_name = "ggml" + def command_params(self): + return { + "-m": self.name, + "-n": self.max_tokens, + "--repeat-penalty": self.repeat_penalty, + "--presence-penalty": self.presence_penalty, + "--frequency-penalty": self.frequency_penalty, + "--top-k": self.top_k, + "--top-p": self.top_p, + "--temp": self.temperature, + "-s": self.seed, + } + + def __init__(self, name, generations=10): self.path_to_ggml_main = os.getenv("GGML_MAIN_PATH") + if self.path_to_ggml_main is None: + raise RuntimeError("Executable not provided by environment GGML_MAIN_PATH") + if not os.path.isfile(self.path_to_ggml_main): + raise FileNotFoundError(f"Path provided is not a file: {self.path_to_ggml_main}") + + # this value cannot be `None`, 0 is consistent and `-1` would produce random seeds + self.seed = _config.run.seed if _config.run.seed is not None else 0 + + # model is a file, validate exists and sanity check file header for supported format + if not os.path.isfile(name): + raise FileNotFoundError(f"File not found, unable to load model: {name}") + else: + with open(name, 'rb') as model_file: + magic_num = model_file.read(len(GGUF_MAGIC)) + if magic_num != GGUF_MAGIC: + raise RuntimeError(f"{name} is not in GGUF format") - self.seed = _config.run.seed super().__init__(name, generations=generations) def _call_model(self, prompt): command = [ self.path_to_ggml_main, - "-m", - self.name, - "-n", - self.max_tokens, - "--repeat-penalty", - self.repeat_penalty, - "--presence-penalty", - self.presence_penalty, - "--frequency-penalty", - self.frequency_penalty, - "--top-k", - self.top_k, - "--top-p", - self.top_p, - "--temp", - self.temperature, - "-s", - self.seed, "-p", prompt, ] + # test all params for None type + for key, value in self.command_params().items(): + if value is not None: + command.append(key) + command.append(value) command = [str(param) for param in command] if _config.system.verbose > 1: print("GGML invoked with", command) - result = subprocess.run( - command, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - check=self.exception_on_failure, - ) - output = result.stdout.decode("utf-8") - output = re.sub("^" + re.escape(prompt.lstrip()), "", output.lstrip()) - return output + try: + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=self.exception_on_failure, + ) + output = result.stdout.decode("utf-8") + output = re.sub("^" + re.escape(prompt.lstrip()), "", output.lstrip()) + self.first_call = False + return output + except subprocess.CalledProcessError as err: + # if this is the first call attempt, raise the exception to indicate + # the generator is mis-configured + print(err.stderr.decode("utf-8")) + logging.error(err.stderr.decode("utf-8")) + if self.first_call: + raise err + return None + except Exception as err: + logging.error(err) + return None default_class = "GgmlGenerator" diff --git a/pyproject.toml b/pyproject.toml index fe9e6d836..a9f41b635 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,8 @@ dependencies = [ "zalgolib>=0.2.2", "ecoji>=0.1.0", "deepl==1.17.0", - "fschat>=0.2.36" + "fschat>=0.2.36", + "typing>=3.7,<3.8; python_version<'3.5'" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 0e24f59c5..458a61edb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,5 @@ numpy>=1.26.1 zalgolib>=0.2.2 ecoji>=0.1.0 deepl==1.17.0 -fschat>=0.2.36 \ No newline at end of file +fschat>=0.2.36 +typing>=3.7,<3.8; python_version<'3.5' \ No newline at end of file