Skip to content

Commit

Permalink
feat: sync llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Oct 23, 2023
1 parent 43b1d71 commit 66f48a5
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 20 deletions.
4 changes: 2 additions & 2 deletions cpp/build-info.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef BUILD_INFO_H
#define BUILD_INFO_H

#define BUILD_NUMBER 1407
#define BUILD_COMMIT "465219b"
#define BUILD_NUMBER 1414
#define BUILD_COMMIT "96981f3"
#define BUILD_COMPILER ""
#define BUILD_TARGET "unknown"

Expand Down
1 change: 1 addition & 0 deletions cpp/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
process_escapes(params.prompt);
process_escapes(params.input_prefix);
process_escapes(params.input_suffix);
process_escapes(sparams.cfg_negative_prompt);
for (auto & antiprompt : params.antiprompt) {
process_escapes(antiprompt);
}
Expand Down
54 changes: 37 additions & 17 deletions cpp/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -986,14 +986,15 @@ static void llama_nop(struct lm_ggml_tensor * tensor) { // don't offload by defa
(void) tensor;
}

static std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
LM_GGML_ASSERT(check == -n_tokens);
} else {
}
else {
result.resize(n_tokens);
}

Expand Down Expand Up @@ -1213,10 +1214,10 @@ struct llama_vocab {
id special_eot_id = 32010;

int find_bpe_rank(std::string token_left, std::string token_right) const {
replace_all(token_left, " ", "\u0120");
replace_all(token_left, "\n", "\u010A");
replace_all(token_right, " ", "\u0120");
replace_all(token_right, "\n", "\u010A");
LM_GGML_ASSERT(token_left.find(" ") == std::string::npos);
LM_GGML_ASSERT(token_left.find("\n") == std::string::npos);
LM_GGML_ASSERT(token_right.find(" ") == std::string::npos);
LM_GGML_ASSERT(token_right.find("\n") == std::string::npos);

auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
if (it == bpe_ranks.end()) {
Expand Down Expand Up @@ -2249,15 +2250,35 @@ static void llm_load_vocab(
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
} else {
vocab.linefeed_id = llama_tokenize_internal(vocab, "\u010A", false)[0];
const std::vector<int> ids = llama_tokenize_internal(vocab, "\u010A", false);
LM_GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
vocab.linefeed_id = ids[0];
}

// special tokens
GGUF_GET_KEY(ctx, vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_BOS_ID));
GGUF_GET_KEY(ctx, vocab.special_eos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_EOS_ID));
GGUF_GET_KEY(ctx, vocab.special_unk_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_UNK_ID));
GGUF_GET_KEY(ctx, vocab.special_sep_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_SEP_ID));
GGUF_GET_KEY(ctx, vocab.special_pad_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_PAD_ID));
{
const std::vector<std::pair<enum llm_kv, int32_t &>> special_token_types = {
{ LLM_KV_TOKENIZER_BOS_ID, vocab.special_bos_id },
{ LLM_KV_TOKENIZER_EOS_ID, vocab.special_eos_id },
{ LLM_KV_TOKENIZER_UNK_ID, vocab.special_unk_id },
{ LLM_KV_TOKENIZER_SEP_ID, vocab.special_sep_id },
{ LLM_KV_TOKENIZER_PAD_ID, vocab.special_pad_id },
};
for (const auto & it : special_token_types) {
const std::string & key = kv(std::get<0>(it));
int32_t & id = std::get<1>(it), old_id = id;

GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key);
// Must be >= -1 and < vocab size. Since the key is unsigned, -1
// can only come from the default value, so there's no point in
// validating that.
if (size_t(id + 1) > vocab.id_to_token.size()) {
LLAMA_LOG_WARN("%s: bad special token: '%s' = %d, using default id %d\n",
__func__, key.c_str(), id, old_id);
id = old_id;
}
}
}

// build special tokens cache
{
Expand Down Expand Up @@ -6114,11 +6135,10 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
}

static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
static const char * hex = "0123456789ABCDEF";
switch (llama_vocab_get_type(vocab)) {
case LLAMA_VOCAB_TYPE_SPM: {
char buf[7];
int result = snprintf(buf, sizeof(buf), "<0x%02X>", ch);
LM_GGML_ASSERT(0 <= result && result < 7);
const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
return vocab.token_to_id.at(buf);
}
case LLAMA_VOCAB_TYPE_BPE: {
Expand Down Expand Up @@ -7491,7 +7511,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c

for (size_t i = 0; i < candidates->size; ++i) {
const llama_token id = candidates->data[i].id;
const std::string piece = llama_token_to_str(ctx, id);
const std::string piece = llama_token_to_piece(ctx, id);
if (id == eos) {
if (!allow_eos) {
candidates->data[i].logit = -INFINITY;
Expand Down Expand Up @@ -7703,7 +7723,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
LM_GGML_ASSERT(false);
}

const std::string piece = llama_token_to_str(ctx, token);
const std::string piece = llama_token_to_piece(ctx, token);

// Note terminating 0 in decoded string
const auto decoded = decode_utf8(piece.c_str(), grammar->partial_utf8);
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp

0 comments on commit 66f48a5

Please sign in to comment.