diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ccc45fb6d2ce0..ca15fe7244055 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3069,9 +3069,14 @@ int main(int argc, char ** argv) { } json body = json::parse(req.body); - if (body.contains("tools") && ctx_server.tool_format != LLAMA_TOOL_FORMAT_NOT_SUPPORTED) { - body["prompt"] = format_chat_with_tool(ctx_server.tool_format, body.at("messages"), body.at("tools")); - body.erase(body.find("tools")); + if (body.contains("tools")) { + if (ctx_server.tool_format != LLAMA_TOOL_FORMAT_NOT_SUPPORTED) { + body["prompt"] = format_chat_with_tool(ctx_server.tool_format, body.at("messages"), body.at("tools")); + body.erase(body.find("tools")); + } else { + res_error(res, format_error_response("This server does not support tool calls. Start it with `--tool-calls`", ERROR_TYPE_NOT_SUPPORTED)); + return; + } } json data = oaicompat_completion_params_parse(ctx_server.model, body, params.chat_template); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 253f8d42f082c..fe6828c74793e 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -499,7 +499,15 @@ static std::vector format_partial_response_oaicompat(json result, const st finish_reason = "length"; } - std::time_t t = std::time(0); + auto wrap_choices = [&completion_id, &modelname](json choices) -> json { + return json{ + {"choices", choices}, + {"created", std::time(0)}, + {"id", completion_id}, + {"model", modelname}, + {"object", "chat.completion.chunk"} + }; + }; json choices; json delta = has_tool_calls @@ -519,22 +527,14 @@ static std::vector format_partial_response_oaicompat(json result, const st {"delta", json::object()}}}); if (has_tool_calls) { // tool call must be send as two updates - json initial_ret = json{{"choices", json::array({json{ - {"finish_reason", nullptr}, - {"index", 0}, - {"delta", delta}}})}, - {"created", t}, - {"id", completion_id}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - - json second_ret = json{ - {"choices", choices}, - {"created", t}, - {"id", completion_id}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - + json initial_ret = wrap_choices(json::array({ + json{ + {"finish_reason", nullptr}, + {"index", 0}, + {"delta", delta}, + } + })); + json second_ret = wrap_choices(choices); return std::vector({initial_ret, second_ret}); } } else { @@ -545,26 +545,22 @@ static std::vector format_partial_response_oaicompat(json result, const st {"delta", json{{"role", "assistant"}}}}}); } else { // We have to send this as two updates to conform to openai behavior - json initial_ret = json{{"choices", json::array({json{ - {"finish_reason", nullptr}, - {"index", 0}, - {"delta", json{ - {"role", "assistant"} - }}}})}, - {"created", t}, - {"id", completion_id}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - - json second_ret = json{ - {"choices", json::array({json{{"finish_reason", nullptr}, - {"index", 0}, - {"delta", delta}}})}, - {"created", t}, - {"id", completion_id}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - + json initial_ret = wrap_choices(json::array({ + json{ + {"finish_reason", nullptr}, + {"index", 0}, + {"delta", json{ + {"role", "assistant"}, + }}, + } + })); + json second_ret = wrap_choices(json::array({ + json{ + {"finish_reason", nullptr}, + {"index", 0}, + {"delta", delta}, + } + })); return std::vector({initial_ret, second_ret}); } } else { @@ -582,13 +578,7 @@ static std::vector format_partial_response_oaicompat(json result, const st } } - json ret = json { - {"choices", choices}, - {"created", t}, - {"id", completion_id}, - {"model", modelname}, - {"object", "chat.completion.chunk"} - }; + json ret = wrap_choices(choices); if (!finish_reason.empty()) { int num_tokens_predicted = json_value(result, "tokens_predicted", 0); int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);