ggerganov · jaggzh · Feb 12, 2024 · Feb 12, 2024 · cebtenzzre · Feb 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -90,3 +90,20 @@ examples/jeopardy/results.txt
 poetry.lock
 poetry.toml
 nppBackup
+
+# Test binaries
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-sampling
+/tests/test-tokenizer-0-llama
+/tests/test-tokenizer-0-falcon
+/tests/test-tokenizer-0-deepseek-coder
+/tests/test-tokenizer-1-llama
+/tests/test-tokenizer-1-bpe
+/tests/test-rope
+/tests/test-backend-ops
diff --git a/Makefile b/Makefile
@@ -8,8 +8,9 @@ BUILD_TARGETS = \
 TEST_TARGETS = \
 	tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
 	tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama          \
-	tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope      \
-	tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
+	tests/test-tokenizer-0-falcon tests/test-tokenizer-0-deepseek-coder tests/test-tokenizer-0-deepseek-llm \
+	tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope      \
+	tests/test-backend-ops
 
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -52,6 +53,10 @@ test: $(TEST_TARGETS)
 			./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
 		elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
 			./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
+		elif [ "$$test_target" = "tests/test-tokenizer-0-deepseek-coder" ]; then \
+			./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \
+		elif [ "$$test_target" = "tests/test-tokenizer-0-deepseek-llm" ]; then \
+			./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-llm.gguf; \
 		elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
 			continue; \
 		elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
@@ -828,6 +833,12 @@ tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+tests/test-tokenizer-0-deepseek-coder: tests/test-tokenizer-0-deepseek-coder.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-tokenizer-0-deepseek-llm: tests/test-tokenizer-0-deepseek-llm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -187,6 +187,8 @@ def from_model_architecture(model_architecture):
             return RefactModel
         if model_architecture == "PersimmonForCausalLM":
             return PersimmonModel
+        if model_architecture == "LlamaForCausalLM":
+            return DeepseekCoderModel
         if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
             return StableLMModel
         if model_architecture == "QWenLMHeadModel":
@@ -211,6 +213,59 @@ def from_model_architecture(model_architecture):
             return MiniCPMModel
         if model_architecture == "BertModel":
             return BertModel
+
+    @staticmethod
+    def from_model_name(model_name: str):
+        model_name_lower = model_name.lower()
+        if model_name_lower in ("stablelmepoch", "llavastablelmepoch"):
+            return StableLMModel
+        if model_name_lower == "gptneox":
+            return GPTNeoXModel
+        if model_name_lower == "bloom":
+            return BloomModel
+        if model_name_lower == "mpt":
+            return MPTModel
+        if model_name_lower in ("baichuan"):
+            return BaichuanModel
+        if model_name_lower in ("falcon", "rw"):
+            return FalconModel
+        if model_name_lower == "gptbigcode":
+            return StarCoderModel
+        if model_name_lower == "gptrefact":
+            return RefactModel
+        if model_name_lower == "persimmon":
+            return PersimmonModel
+        if model_name_lower == "deepseekcoder":
+            return DeepseekCoderModel
+        if model_name_lower == "deepseekllm":
+            return DeepseekLLMModel
+        return Model
+
+    @staticmethod
+    def from_model_name(model_name: str):
+        model_name_lower = model_name.lower()
+        if model_name_lower in ("stablelmepoch", "llavastablelmepoch"):
+            return StableLMModel
+        if model_name_lower == "gptneox":
+            return GPTNeoXModel
+        if model_name_lower == "bloom":
+            return BloomModel
+        if model_name_lower == "mpt":
+            return MPTModel
+        if model_name_lower in ("baichuan"):
+            return BaichuanModel
+        if model_name_lower in ("falcon", "rw"):
+            return FalconModel
+        if model_name_lower == "gptbigcode":
+            return StarCoderModel
+        if model_name_lower == "gptrefact":
+            return RefactModel
+        if model_name_lower == "persimmon":
+            return PersimmonModel
+        if model_name_lower == "deepseekcoder":
+            return DeepseekCoderModel
+        if model_name_lower == "deepseekllm":
+            return DeepseekLLMModel
         return Model
 
     def _is_model_safetensors(self) -> bool:
@@ -244,6 +299,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
             return gguf.MODEL_ARCH.REFACT
         if arch == "PersimmonForCausalLM":
             return gguf.MODEL_ARCH.PERSIMMON
+        if arch == "LlamaForCausalLM":
+            return gguf.MODEL_ARCH.LLAMA
         if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
             return gguf.MODEL_ARCH.STABLELM
         if arch == "QWenLMHeadModel":
@@ -271,7 +328,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
 
         raise NotImplementedError(f'Architecture "{arch}" not supported!')
 
-    def _set_vocab_gpt2(self):
+    def _set_vocab_gpt2(self, tokenizer_model:str = "gpt2"):
         dir_model = self.dir_model
         hparams = self.hparams
         tokens: list[bytearray] = []
@@ -300,7 +357,7 @@ def _set_vocab_gpt2(self):
                 tokens.append(reverse_vocab[i])
                 toktypes.append(gguf.TokenType.NORMAL)
 
-        self.gguf_writer.add_tokenizer_model("gpt2")
+        self.gguf_writer.add_tokenizer_model(tokenizer_model)
         self.gguf_writer.add_token_list(tokens)
         self.gguf_writer.add_token_types(toktypes)
 
@@ -1048,6 +1105,29 @@ def write_tensors(self):
             self.gguf_writer.add_tensor(new_name, data)
 
 
+class DeepseekCoderModel(Model):
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        head_count = self.hparams["num_attention_heads"]
+        head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+        self.gguf_writer.add_head_count(head_count)
+        self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+        self.gguf_writer.add_head_count_kv(head_count_kv)
+        self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+        self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
+
+        if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+            if self.hparams["rope_scaling"].get("type") == "linear":
+                self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+                self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
+    def set_vocab(self):
+        self._set_vocab_gpt2("deepseek_coder")
+
+class DeepseekLLMModel(DeepseekCoderModel):
+    def set_vocab(self):
+        self._set_vocab_gpt2("deepseek_llm")
+
 class StableLMModel(Model):
     def set_vocab(self):
         if (self.dir_model / "tokenizer.json").is_file():
@@ -1749,6 +1829,7 @@ def parse_args() -> argparse.Namespace:
         "model", type=Path,
         help="directory containing model file",
     )
+    parser.add_argument("--model-name", type=str, default=None, help="name of the model")
 
     return parser.parse_args()