Skip to content

Commit

Permalink
convert chameleon hf to gguf
Browse files Browse the repository at this point in the history
  • Loading branch information
nopperl committed Jul 15, 2024
1 parent bda62d7 commit 385c1a8
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 2 deletions.
25 changes: 25 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901":
# ref: https://huggingface.co/core42/jais-13b
res = "jais"
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
# ref: https://huggingface.co/facebook/chameleon-7b
res = "chameleon"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -3419,6 +3422,28 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
name = name.removeprefix("transformer.")
return [(self.map_tensor_name(name), data_torch)]

@Model.register("ChameleonForCausalLM")
class ChameleonModel(Model):
model_arch = gguf.MODEL_ARCH.CHAMELEON

def set_vocab(self):
self._set_vocab_gpt2()

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# ignore image tokenizer for now
if name.startswith("model.vqmodel"):
return []

n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")

if name.endswith(("q_proj.weight", "q_proj.bias")):
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
if name.endswith(("k_proj.weight", "k_proj.bias")):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)

return [(self.map_tensor_name(name), data_torch)]

###### CONVERSION LOGIC ######


Expand Down
1 change: 1 addition & 0 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
]


Expand Down
18 changes: 18 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class MODEL_ARCH(IntEnum):
BITNET = auto()
T5 = auto()
JAIS = auto()
CHAMELEON = auto()


class MODEL_TENSOR(IntEnum):
Expand Down Expand Up @@ -293,6 +294,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.BITNET: "bitnet",
MODEL_ARCH.T5: "t5",
MODEL_ARCH.JAIS: "jais",
MODEL_ARCH.CHAMELEON: "chameleon",
}

TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
Expand Down Expand Up @@ -996,6 +998,22 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.CHAMELEON: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_Q_NORM,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
# TODO
}

Expand Down
4 changes: 2 additions & 2 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ class TensorNameMap:
MODEL_TENSOR.ATTN_Q_NORM: (
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere
"model.layers.{bid}.self_attn.q_norm", # cohere chameleon
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
"transformer.layers.{bid}.attn.q_norm", # openelm
Expand All @@ -373,7 +373,7 @@ class TensorNameMap:
MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere
"model.layers.{bid}.self_attn.k_norm", # cohere chameleon
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
"transformer.layers.{bid}.attn.k_norm", # openelm
Expand Down

0 comments on commit 385c1a8

Please sign in to comment.