Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gguf-py : fix some metadata name extraction edge cases #8591

Merged
merged 5 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Model:
gguf_writer: gguf.GGUFWriter
model_name: str | None
metadata_override: Path | None
dir_model_card: Path

# subclasses should define this!
model_arch: gguf.MODEL_ARCH
Expand Down Expand Up @@ -90,6 +91,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path |
self.tensor_names = None
self.metadata_override = metadata_override
self.model_name = model_name
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py

# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
if self.ftype == gguf.LlamaFileType.GUESSED:
Expand Down Expand Up @@ -345,7 +347,7 @@ def prepare_metadata(self, vocab_only: bool):

total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count()

self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params)
self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, self.dir_model_card, total_params)

# Fallback to model directory name if metadata name is still missing
if self.metadata.name is None:
Expand Down
24 changes: 17 additions & 7 deletions convert_lora_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,6 @@ def parse_args() -> argparse.Namespace:
# load base model
logger.info(f"Loading base model: {dir_base_model.name}")
hparams = Model.load_hparams(dir_base_model)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha: float = lparams["lora_alpha"]

with torch.inference_mode():
try:
model_class = Model.from_model_architecture(hparams["architectures"][0])
Expand All @@ -320,12 +314,21 @@ def parse_args() -> argparse.Namespace:
class LoraModel(model_class):
model_arch = model_class.model_arch

lora_alpha: float

def __init__(self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs):

super().__init__(*args, **kwargs)

self.dir_model_card = dir_lora_model
self.lora_alpha = float(lora_alpha)

def set_type(self):
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")

def set_gguf_parameters(self):
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
super().set_gguf_parameters()

def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
Expand Down Expand Up @@ -368,6 +371,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
yield (dest_name + ".lora_a", lora_a)
yield (dest_name + ".lora_b", lora_b)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha: float = lparams["lora_alpha"]

model_instance = LoraModel(
dir_base_model,
ftype,
Expand All @@ -376,6 +384,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
use_temp_file=False,
eager=args.no_lazy,
dry_run=args.dry_run,
dir_lora_model=dir_lora,
lora_alpha=alpha,
)

logger.info("Exporting model...")
Expand Down
34 changes: 25 additions & 9 deletions gguf-py/gguf/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,22 @@ class Metadata:
datasets: Optional[list[str]] = None

@staticmethod
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, model_card_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect

# Create a new Metadata instance
metadata = Metadata()

model_card = Metadata.load_model_card(model_path)
if model_card_path is None:
model_card_path = model_path

model_card = Metadata.load_model_card(model_card_path)
hf_params = Metadata.load_hf_parameters(model_path)

# heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_card_path, total_params)

# Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp
Expand Down Expand Up @@ -177,6 +180,12 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
org_component = None

name_parts: list[str] = model_full_name_component.split('-')

# Remove empty parts
for i in reversed(range(len(name_parts))):
if len(name_parts[i]) == 0:
del name_parts[i]

name_types: list[
set[Literal["basename", "size_label", "finetune", "version", "type"]]
] = [set() for _ in name_parts]
Expand Down Expand Up @@ -227,6 +236,13 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =
if part.lower() == "lora":
name_parts[i] = "LoRA"

# Ignore word-based size labels when there is at least a number-based one present
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
for n, t in zip(name_parts, name_types):
if "size_label" in t:
if all(c.isalpha() for c in n):
t.remove("size_label")

at_start = True
# Find the basename through the annotated name
for part, t in zip(name_parts, name_types):
Expand All @@ -240,18 +256,18 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int =

# Remove the basename annotation from trailing version
for part, t in zip(reversed(name_parts), reversed(name_types)):
if "basename" in t:
if len(t) > 1:
t.remove("basename")
if "basename" in t and len(t) > 1:
t.remove("basename")
else:
break

basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
# TODO: should the basename version always be excluded?
# TODO: should multiple versions be joined together?
version = ([v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t] or [None])[-1]
# NOTE: multiple finetune versions are joined together
version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None

if size_label is None and finetune is None and version is None:
# Too ambiguous, output nothing
Expand Down
34 changes: 31 additions & 3 deletions gguf-py/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"),
('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, '8B'))

# Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing...
# Non standard naming
self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"),
('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B'))

Expand All @@ -71,7 +71,7 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3),
('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K'))

# None standard and not easy to disambiguate
# Non standard and not easy to disambiguate
self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"),
('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None))

Expand Down Expand Up @@ -123,6 +123,34 @@ def test_get_model_id_components(self):
self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"),
('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B'))

# Ignore full-text size labels when there are number-based ones, and deduplicate size labels
self.assertEqual(gguf.Metadata.get_model_id_components("MaziyarPanahi/GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1"),
('GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1', 'MaziyarPanahi', 'GreenNode-mini', 'multilingual-v1olet-Mistral-Instruct', 'v0.1', '7B'))

# Instruct in a name without a size label
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/Mistral-Nemo-Instruct-2407"),
('Mistral-Nemo-Instruct-2407', 'mistralai', 'Mistral-Nemo', 'Instruct', '2407', None))

# Non-obvious splitting relying on 'chat' keyword
self.assertEqual(gguf.Metadata.get_model_id_components("deepseek-ai/DeepSeek-V2-Chat-0628"),
('DeepSeek-V2-Chat-0628', 'deepseek-ai', 'DeepSeek-V2', 'Chat', '0628', None))

# Multiple versions
self.assertEqual(gguf.Metadata.get_model_id_components("OpenGVLab/Mini-InternVL-Chat-2B-V1-5"),
('Mini-InternVL-Chat-2B-V1-5', 'OpenGVLab', 'Mini-InternVL', 'Chat', 'V1-5', '2B'))

# Too ambiguous
# TODO: should "base" be a 'finetune' or 'size_label'?
# (in this case it should be a size label, but other models use it to signal that they are not finetuned)
self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Florence-2-base"),
('Florence-2-base', 'microsoft', None, None, None, None))

## Invalid cases ##

# Start with a dash and has dashes in rows
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/-Mistral--Nemo-Base-2407-"),
('-Mistral--Nemo-Base-2407-', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None))

def test_apply_metadata_heuristic_from_model_card(self):
model_card = {
'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],
Expand All @@ -134,7 +162,7 @@ def test_apply_metadata_heuristic_from_model_card(self):
}
got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
expect = gguf.Metadata()
expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': 'v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}]
expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': '14-v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}]
expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl']
expect.languages=['en']
expect.datasets=['teknium/OpenHermes-2.5']
Expand Down
Loading