Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert GGML to expect GGUF format #581

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ currently supports:
* [hugging face hub](https://huggingface.co/models) generative models
* [replicate](https://replicate.com/) text models
* [openai api](https://platform.openai.com/docs/introduction) chat & continuation models
* ggml models like [llama.cpp](https://github.com/ggerganov/llama.cpp)
* gguf models like [llama.cpp](https://github.com/ggerganov/llama.cpp) version >= 1046
* .. and many more LLMs!

## Install:
Expand Down
2 changes: 1 addition & 1 deletion garak/generators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _call_model(self, prompt: str) -> Union[List[str], str, None]:
succeed or raise an exception. The @backoff decorator can be helpful
here - see garak.generators.openai for an example usage.

Can return None if no reponse was elicited"""
Can return None if no response was elicited"""
raise NotImplementedError

def _pre_generate_hook(self):
Expand Down
92 changes: 62 additions & 30 deletions garak/generators/ggml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""ggml generator support

This generator works with ggml models like llama.cpp.
This generator works with ggml models in gguf format like llama.cpp.

Put the path to your ggml executable (e.g. "/home/leon/llama.cpp/main") in
an environment variable named GGML_MAIN_PATH, and pass the path to the
Expand All @@ -13,16 +13,18 @@
"""


import logging
import os
import re
import subprocess

from garak import _config
from garak.generators.base import Generator

GGUF_MAGIC = bytes([0x47, 0x47, 0x55, 0x46])

class GgmlGenerator(Generator):
"""Generator interface for ggml models.
"""Generator interface for ggml models in gguf format.

Set the path to the model as the model name, and put the path to the ggml executable in environment variable GGML_MAIN_PATH.
"""
Expand All @@ -34,51 +36,81 @@ class GgmlGenerator(Generator):
top_p = 0.95
temperature = 0.8
exception_on_failure = True
first_call = True

generator_family_name = "ggml"

def command_params(self):
return {
"-m": self.name,
"-n": self.max_tokens,
"--repeat-penalty": self.repeat_penalty,
"--presence-penalty": self.presence_penalty,
"--frequency-penalty": self.frequency_penalty,
"--top-k": self.top_k,
"--top-p": self.top_p,
"--temp": self.temperature,
"-s": self.seed,
}


def __init__(self, name, generations=10):
self.path_to_ggml_main = os.getenv("GGML_MAIN_PATH")
if self.path_to_ggml_main is None:
raise RuntimeError("Executable not provided by environment GGML_MAIN_PATH")
if not os.path.isfile(self.path_to_ggml_main):
raise FileNotFoundError(f"Path provided is not a file: {self.path_to_ggml_main}")

# this value cannot be `None`, 0 is consistent and `-1` would produce random seeds
self.seed = _config.run.seed if _config.run.seed is not None else 0

# model is a file, validate exists and sanity check file header for supported format
if not os.path.isfile(name):
raise FileNotFoundError(f"File not found, unable to load model: {name}")
else:
with open(name, 'rb') as model_file:
magic_num = model_file.read(len(GGUF_MAGIC))
if magic_num != GGUF_MAGIC:
raise RuntimeError(f"{name} is not in GGUF format")

self.seed = _config.run.seed
super().__init__(name, generations=generations)

def _call_model(self, prompt):
command = [
self.path_to_ggml_main,
"-m",
self.name,
"-n",
self.max_tokens,
"--repeat-penalty",
self.repeat_penalty,
"--presence-penalty",
self.presence_penalty,
"--frequency-penalty",
self.frequency_penalty,
"--top-k",
self.top_k,
"--top-p",
self.top_p,
"--temp",
self.temperature,
"-s",
self.seed,
"-p",
prompt,
]
# test all params for None type
for key, value in self.command_params().items():
if value is not None:
command.append(key)
command.append(value)
command = [str(param) for param in command]
if _config.system.verbose > 1:
print("GGML invoked with", command)
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
check=self.exception_on_failure,
)
output = result.stdout.decode("utf-8")
output = re.sub("^" + re.escape(prompt.lstrip()), "", output.lstrip())
return output
try:
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=self.exception_on_failure,
)
output = result.stdout.decode("utf-8")
output = re.sub("^" + re.escape(prompt.lstrip()), "", output.lstrip())
self.first_call = False
return output
except subprocess.CalledProcessError as err:
# if this is the first call attempt, raise the exception to indicate
# the generator is mis-configured
print(err.stderr.decode("utf-8"))
logging.error(err.stderr.decode("utf-8"))
if self.first_call:
raise err
return None
except Exception as err:
logging.error(err)
return None


default_class = "GgmlGenerator"
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ dependencies = [
"zalgolib>=0.2.2",
"ecoji>=0.1.0",
"deepl==1.17.0",
"fschat>=0.2.36"
"fschat>=0.2.36",
"typing>=3.7,<3.8; python_version<'3.5'"
]

[project.urls]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ numpy>=1.26.1
zalgolib>=0.2.2
ecoji>=0.1.0
deepl==1.17.0
fschat>=0.2.36
fschat>=0.2.36
typing>=3.7,<3.8; python_version<'3.5'
Loading