convert-hf : for T5 skip both decoder.embed_tokens and encoder.embed_…

…tokens tensors (they are duplicates of shared tensor)
ggerganov · Jun 23, 2024 · 98931f8 · 98931f8
1 parent 47a0a0c
commit 98931f8
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -2837,9 +2837,10 @@ def set_gguf_parameters(self):
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         del bid  # unused
 
-        # flan-t5-xxl contains "decoder.embed_tokens.weight" tensor that is the same as "shared.weight" tensor
-        # To prevent errors caused by an unnecessary unmapped tensor, skip "decoder.embed_tokens.weight".
-        if name == "decoder.embed_tokens.weight":
+        # Sometimes T5 and Flan-T5 based models contain "encoder.embed_tokens.weight" tensor or
+        # "decoder.embed_tokens.weight" tensors that are duplicates of "shared.weight" tensor
+        # To prevent errors caused by an unnecessary unmapped tensor, skip both of them and use only "shared.weight".
+        if name == "decoder.embed_tokens.weight" or name == "encoder.embed_tokens.weight":
             logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
             return []