Skip to content

Commit

Permalink
Server: Don't ignore llama.cpp params (ggerganov#8754)
Browse files Browse the repository at this point in the history
* Don't ignore llama.cpp params

* Add fallback for max_tokens
  • Loading branch information
ardfork authored and arthw committed Aug 7, 2024
1 parent 5572ec4 commit 17e8bab
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 19 deletions.
2 changes: 1 addition & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,7 @@ struct server_context {

slot.params.stream = json_value(data, "stream", false);
slot.params.cache_prompt = json_value(data, "cache_prompt", false);
slot.params.n_predict = json_value(data, "n_predict", default_params.n_predict);
slot.params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", default_params.n_predict));
slot.sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot.sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
Expand Down
18 changes: 0 additions & 18 deletions examples/server/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,24 +355,6 @@ static json oaicompat_completion_params_parse(

llama_params["__oaicompat"] = true;

// Map OpenAI parameters to llama.cpp parameters
//
// For parameters that are defined by the OpenAI documentation (e.g.
// temperature), we explicitly specify OpenAI's intended default; we
// need to do that because sometimes OpenAI disagrees with llama.cpp
//
// https://platform.openai.com/docs/api-reference/chat/create
llama_sampling_params default_sparams;
llama_params["model"] = json_value(body, "model", std::string("unknown"));
llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0);
llama_params["logit_bias"] = json_value(body, "logit_bias", json::object());
llama_params["n_predict"] = json_value(body, "max_tokens", -1);
llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0);
llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED);
llama_params["stream"] = json_value(body, "stream", false);
llama_params["temperature"] = json_value(body, "temperature", 1.0);
llama_params["top_p"] = json_value(body, "top_p", 1.0);

// Apply chat template to the list of messages
llama_params["prompt"] = format_chat(model, chat_template, body.at("messages"));

Expand Down

0 comments on commit 17e8bab

Please sign in to comment.