Skip to content

Commit

Permalink
convert-hf : for T5 skip both decoder.embed_tokens and encoder.embed_…
Browse files Browse the repository at this point in the history
…tokens tensors (they are duplicates of shared tensor)
  • Loading branch information
sszymczy committed Jun 23, 2024
1 parent 47a0a0c commit 98931f8
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2837,9 +2837,10 @@ def set_gguf_parameters(self):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused

# flan-t5-xxl contains "decoder.embed_tokens.weight" tensor that is the same as "shared.weight" tensor
# To prevent errors caused by an unnecessary unmapped tensor, skip "decoder.embed_tokens.weight".
if name == "decoder.embed_tokens.weight":
# Sometimes T5 and Flan-T5 based models contain "encoder.embed_tokens.weight" tensor or
# "decoder.embed_tokens.weight" tensors that are duplicates of "shared.weight" tensor
# To prevent errors caused by an unnecessary unmapped tensor, skip both of them and use only "shared.weight".
if name == "decoder.embed_tokens.weight" or name == "encoder.embed_tokens.weight":
logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
return []

Expand Down

0 comments on commit 98931f8

Please sign in to comment.