Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lora : fix llama conversion script with model having ROPE_FREQS #9117

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,15 @@ class Model:
model_name: str | None
metadata_override: Path | None
dir_model_card: Path
is_lora: bool

# subclasses should define this!
model_arch: gguf.MODEL_ARCH

def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
use_temp_file: bool = False, eager: bool = False,
metadata_override: Path | None = None, model_name: str | None = None,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False, is_lora: bool = False):
if type(self) is Model:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")

Expand All @@ -92,6 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
self.metadata_override = metadata_override
self.model_name = model_name
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
self.is_lora = is_lora # true if model is used inside convert_lora_to_gguf.py

# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
if self.ftype == gguf.LlamaFileType.GUESSED:
Expand Down Expand Up @@ -1593,7 +1595,8 @@ def prepare_tensors(self):
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
rope_factors.append(1 / ((1 - smooth) / factor + smooth))

self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
if not self.is_lora:
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
Comment on lines +1598 to +1599
Copy link
Collaborator

@compilade compilade Aug 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Phi-3, vocab-only conversion is also affected by these rope_freqs tensors, because this is in set_gguf_parameters. (which makes vocab-only Phi-3-128k models produce invalid GGUF files (this is already a problem on master))

A more general solution to both LoRA and vocab-only conversions should be possible.

Maybe some kind of self.generate_extra_tensors() which would be called by self.prepare_tensors() before it calls self.get_tensors(). And LoraModel could simply override generate_extra_tensors() to a no-op (and vocab-only conversion does not call prepare_tensors). It can be done in a follow-up PR, though.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK I'll merge this now and will let you refactor this further in a follow-up PR.
Thank you for the help!


super().prepare_tensors()

Expand Down Expand Up @@ -2140,8 +2143,9 @@ def set_gguf_parameters(self):
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')

self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
if not self.is_lora:
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))


@Model.register("PlamoForCausalLM")
Expand Down Expand Up @@ -3839,7 +3843,8 @@ def prepare_tensors(self):
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
rope_factors.append(1 / ((1 - smooth) / factor + smooth))

self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
if not self.is_lora:
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))

super().prepare_tensors()

Expand Down
1 change: 1 addition & 0 deletions convert_lora_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
dry_run=args.dry_run,
dir_lora_model=dir_lora,
lora_alpha=alpha,
is_lora=True,
)

logger.info("Exporting model...")
Expand Down
2 changes: 1 addition & 1 deletion tests/test-lora-conversion-inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO
# Clone the Hugging Face repository if the directory does not exist
if [ ! -d "$MODELS_REPO" ]; then
echo "Cloning the Hugging Face repository..."
git clone $MODELS_REPO_URL
git clone $MODELS_REPO_URL --depth 1
else
echo "Repository already exists. Skipping clone."
fi
Expand Down
Loading