Skip to content

Commit

Permalink
cont : drop "penalty prompt" support (#3727)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Aug 12, 2024
1 parent e08100c commit c5734f1
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 61 deletions.
2 changes: 1 addition & 1 deletion common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
llama_token_data_array cur_p = { cur.data(), cur.size(), false };

// apply penalties
const auto& penalty_tokens = params.use_penalty_prompt_tokens ? params.penalty_prompt_tokens : prev;
const auto & penalty_tokens = prev;
const int penalty_tokens_used_size = std::min((int)penalty_tokens.size(), penalty_last_n);
if (penalty_tokens_used_size) {
const float nl_logit = logits[llama_token_nl(llama_get_model(ctx_main))];
Expand Down
3 changes: 0 additions & 3 deletions common/sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ typedef struct gpt_sampling_params {
float cfg_scale = 1.f; // how strong is guidance

std::vector<llama_logit_bias> logit_bias; // logit biases to apply

std::vector<llama_token> penalty_prompt_tokens;
bool use_penalty_prompt_tokens = false;
} gpt_sampling_params;

// general sampler context
Expand Down
6 changes: 1 addition & 5 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,6 @@ node index.js

`frequency_penalty`: Repeat alpha frequency penalty. Default: `0.0`, which is disabled.

`penalty_prompt`: This will replace the `prompt` for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens. Default: `null`, which is to use the original `prompt`.

`mirostat`: Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.

`mirostat_tau`: Set the Mirostat target entropy, parameter tau. Default: `5.0`
Expand Down Expand Up @@ -672,7 +670,6 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
"stopping_word": ""
},
"penalize_nl": true,
"penalty_prompt_tokens": [],
"presence_penalty": 0.0,
"prompt": "Say hello to llama.cpp",
"repeat_last_n": 64,
Expand All @@ -696,8 +693,7 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.949999988079071,
"typical_p": 1.0,
"use_penalty_prompt_tokens": false
"typical_p": 1.0
}
]
```
Expand Down
52 changes: 0 additions & 52 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -986,51 +986,6 @@ struct server_context {
}
}

// penalize user-provided tokens
{
slot.sparams.penalty_prompt_tokens.clear();
slot.sparams.use_penalty_prompt_tokens = false;

const auto & penalty_prompt = data.find("penalty_prompt");

if (penalty_prompt != data.end()) {
if (penalty_prompt->is_string()) {
const auto penalty_prompt_string = penalty_prompt->get<std::string>();
slot.sparams.penalty_prompt_tokens = llama_tokenize(model, penalty_prompt_string, false);

if (slot.params.n_predict > 0) {
slot.sparams.penalty_prompt_tokens.reserve(slot.sparams.penalty_prompt_tokens.size() + slot.params.n_predict);
}
slot.sparams.use_penalty_prompt_tokens = true;

LOG_VERBOSE("penalty_prompt_tokens", {
{"id_slot", slot.id},
{"tokens", slot.sparams.penalty_prompt_tokens},
});
}
else if (penalty_prompt->is_array()) {
const auto n_tokens = penalty_prompt->size();
slot.sparams.penalty_prompt_tokens.reserve(n_tokens + std::max(0, slot.params.n_predict));

const int n_vocab = llama_n_vocab(model);
for (const auto & penalty_token : *penalty_prompt) {
if (penalty_token.is_number_integer()) {
const auto tok = penalty_token.get<llama_token>();
if (tok >= 0 && tok < n_vocab) {
slot.sparams.penalty_prompt_tokens.push_back(tok);
}
}
}
slot.sparams.use_penalty_prompt_tokens = true;

LOG_VERBOSE("penalty_prompt_tokens", {
{"id_slot", slot.id},
{"tokens", slot.sparams.penalty_prompt_tokens},
});
}
}
}

{
slot.sparams.logit_bias.clear();

Expand Down Expand Up @@ -1201,11 +1156,6 @@ struct server_context {
slot.generated_text += token_str;
slot.has_next_token = true;

if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1) {
// we can change penalty_prompt_tokens because it is always created from scratch each request
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
}

// check if there is incomplete UTF-8 character at the end
bool incomplete = false;
for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i) {
Expand Down Expand Up @@ -1346,8 +1296,6 @@ struct server_context {
{"repeat_penalty", slot.sparams.penalty_repeat},
{"presence_penalty", slot.sparams.penalty_present},
{"frequency_penalty", slot.sparams.penalty_freq},
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
{"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta},
Expand Down
1 change: 1 addition & 0 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ extern "C" {
bool ignore_eos; // ignore the end-of-sequence token

const char * grammar;
const char * grammar_root;

int32_t n_logit_bias;
const llama_logit_bias * logit_bias;
Expand Down
1 change: 1 addition & 0 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16514,6 +16514,7 @@ struct llama_sampling_params llama_sampling_default_params() {
/*.penalize_nl =*/ false,
/*.ignore_eos =*/ false,
/*.grammar =*/ nullptr,
/*.grammar_root =*/ nullptr,
/*.n_logit_bias =*/ 0,
/*.logit_bias =*/ nullptr,
};
Expand Down

0 comments on commit c5734f1

Please sign in to comment.