Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deepseek coder merge #5464

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,20 @@ examples/jeopardy/results.txt
poetry.lock
poetry.toml
nppBackup

# Test binaries
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-double-float
/tests/test-grad0
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-sampling
/tests/test-tokenizer-0-llama
/tests/test-tokenizer-0-falcon
/tests/test-tokenizer-0-deepseek-coder
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ BUILD_TARGETS = \
TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
tests/test-tokenizer-0-falcon tests/test-tokenizer-0-deepseek-coder tests/test-tokenizer-0-deepseek-llm \
tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
tests/test-backend-ops

# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
Expand Down Expand Up @@ -52,6 +53,10 @@ test: $(TEST_TARGETS)
./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
elif [ "$$test_target" = "tests/test-tokenizer-0-deepseek-coder" ]; then \
./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \
elif [ "$$test_target" = "tests/test-tokenizer-0-deepseek-llm" ]; then \
./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-llm.gguf; \
elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
continue; \
elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
Expand Down Expand Up @@ -828,6 +833,12 @@ tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-tokenizer-0-deepseek-coder: tests/test-tokenizer-0-deepseek-coder.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-tokenizer-0-deepseek-llm: tests/test-tokenizer-0-deepseek-llm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
Expand Down
85 changes: 83 additions & 2 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ def from_model_architecture(model_architecture):
return RefactModel
if model_architecture == "PersimmonForCausalLM":
return PersimmonModel
if model_architecture == "LlamaForCausalLM":
return DeepseekCoderModel
if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return StableLMModel
if model_architecture == "QWenLMHeadModel":
Expand All @@ -211,6 +213,59 @@ def from_model_architecture(model_architecture):
return MiniCPMModel
if model_architecture == "BertModel":
return BertModel

@staticmethod
def from_model_name(model_name: str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this ever used? And why this function duplicated below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this ever used? And why this function duplicated below?

I'm not sure. Looks like a convenient function for mapping if someone needs it in future convert-hf-to-gguf work. (The duplication is likely my fault, of course).
Should I get rid of it, comment it out? (Remember, I'm just merging their contributed deepcode/hf tokenizer code over mostly blindly -- although it does work. Resolves that out-of-range error too.) @ggerganov.

Let me know -- I'll also have to rebase and resubmit.

Copy link
Collaborator

@cebtenzzre cebtenzzre Mar 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked - it looks like ggerganov accidentally dropped this in d24da31 (#4070). It's apparently used for forcing the model used via the command line? This will really be out-of-place after #5825, it should probably just be removed.

Copy link
Collaborator

@cebtenzzre cebtenzzre Mar 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see now - DeepseekCoderModel and DeepseekLLMModel can't be disambiguated from the model architecture alone. This should be changed so that they use a single class that derives tokenizer_model from either the model's config, or the command-line arguments if it really is a user choice.

It's honestly not clear to me why LlamaForCausalLM is referenced at all in convert-hf-to-gguf.py - convert.py is already capable of dealing with a llama model with a non-SPM tokenizer, and has superior memory management (so it's faster).

model_name_lower = model_name.lower()
if model_name_lower in ("stablelmepoch", "llavastablelmepoch"):
return StableLMModel
if model_name_lower == "gptneox":
return GPTNeoXModel
if model_name_lower == "bloom":
return BloomModel
if model_name_lower == "mpt":
return MPTModel
if model_name_lower in ("baichuan"):
return BaichuanModel
if model_name_lower in ("falcon", "rw"):
return FalconModel
if model_name_lower == "gptbigcode":
return StarCoderModel
if model_name_lower == "gptrefact":
return RefactModel
if model_name_lower == "persimmon":
return PersimmonModel
if model_name_lower == "deepseekcoder":
return DeepseekCoderModel
if model_name_lower == "deepseekllm":
return DeepseekLLMModel
return Model

@staticmethod
def from_model_name(model_name: str):
model_name_lower = model_name.lower()
if model_name_lower in ("stablelmepoch", "llavastablelmepoch"):
return StableLMModel
if model_name_lower == "gptneox":
return GPTNeoXModel
if model_name_lower == "bloom":
return BloomModel
if model_name_lower == "mpt":
return MPTModel
if model_name_lower in ("baichuan"):
return BaichuanModel
if model_name_lower in ("falcon", "rw"):
return FalconModel
if model_name_lower == "gptbigcode":
return StarCoderModel
if model_name_lower == "gptrefact":
return RefactModel
if model_name_lower == "persimmon":
return PersimmonModel
if model_name_lower == "deepseekcoder":
return DeepseekCoderModel
if model_name_lower == "deepseekllm":
return DeepseekLLMModel
return Model

def _is_model_safetensors(self) -> bool:
Expand Down Expand Up @@ -244,6 +299,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
return gguf.MODEL_ARCH.REFACT
if arch == "PersimmonForCausalLM":
return gguf.MODEL_ARCH.PERSIMMON
if arch == "LlamaForCausalLM":
return gguf.MODEL_ARCH.LLAMA
if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return gguf.MODEL_ARCH.STABLELM
if arch == "QWenLMHeadModel":
Expand Down Expand Up @@ -271,7 +328,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:

raise NotImplementedError(f'Architecture "{arch}" not supported!')

def _set_vocab_gpt2(self):
def _set_vocab_gpt2(self, tokenizer_model:str = "gpt2"):
dir_model = self.dir_model
hparams = self.hparams
tokens: list[bytearray] = []
Expand Down Expand Up @@ -300,7 +357,7 @@ def _set_vocab_gpt2(self):
tokens.append(reverse_vocab[i])
toktypes.append(gguf.TokenType.NORMAL)

self.gguf_writer.add_tokenizer_model("gpt2")
self.gguf_writer.add_tokenizer_model(tokenizer_model)
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes)

Expand Down Expand Up @@ -1048,6 +1105,29 @@ def write_tensors(self):
self.gguf_writer.add_tensor(new_name, data)


class DeepseekCoderModel(Model):
def set_gguf_parameters(self):
super().set_gguf_parameters()
head_count = self.hparams["num_attention_heads"]
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
self.gguf_writer.add_head_count(head_count)
self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
self.gguf_writer.add_head_count_kv(head_count_kv)
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])

if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
if self.hparams["rope_scaling"].get("type") == "linear":
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])

def set_vocab(self):
self._set_vocab_gpt2("deepseek_coder")

class DeepseekLLMModel(DeepseekCoderModel):
def set_vocab(self):
self._set_vocab_gpt2("deepseek_llm")

class StableLMModel(Model):
def set_vocab(self):
if (self.dir_model / "tokenizer.json").is_file():
Expand Down Expand Up @@ -1749,6 +1829,7 @@ def parse_args() -> argparse.Namespace:
"model", type=Path,
help="directory containing model file",
)
parser.add_argument("--model-name", type=str, default=None, help="name of the model")

return parser.parse_args()

Expand Down
Loading
Loading