From c4bd681a77a552ef03ce2bb5de8ecc2d4cded564 Mon Sep 17 00:00:00 2001 From: okada Date: Sat, 9 Dec 2023 14:04:26 +0900 Subject: [PATCH] replace cinatra with cpp-httplib --- CMakeLists.txt | 32 ++++++------------ flatline_server.cpp | 81 ++++++++++++++++++++++++--------------------- 2 files changed, 54 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b199b7..640d00e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,12 +15,6 @@ FetchContent_Declare( GIT_TAG add_pfnet_plamo_13b ) -FetchContent_Declare( - asio - GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git - GIT_TAG asio-1-28-1 -) - set(JSONCPP_WITH_TESTS OFF) set(JSONCPP_WITH_POST_BUILD_UNITTEST OFF) set(JSONCPP_WITH_TESTS OFF) @@ -38,33 +32,29 @@ set(SPDLOG_BUILD_SHARED OFF) FetchContent_Declare( spdlog URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.12.0.zip") -FetchContent_MakeAvailable(llama_cpp asio jsoncpp structopt spdlog) - -add_library(asio INTERFACE) -target_include_directories(asio INTERFACE "${asio_SOURCE_DIR}/asio/include") +FetchContent_MakeAvailable(llama_cpp jsoncpp structopt spdlog) FetchContent_Declare( - cinatra - GIT_REPOSITORY https://github.com/okdshin/cinatra.git - GIT_TAG v0.8.0_g++10 + httplib + GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git + GIT_TAG v0.14.2 ) -FetchContent_Populate(cinatra) -add_library(cinatra INTERFACE) -target_include_directories(cinatra INTERFACE "${cinatra_SOURCE_DIR}/include") + +FetchContent_Populate(httplib) +add_library(httplib INTERFACE) +target_include_directories(httplib INTERFACE "${httplib_SOURCE_DIR}") find_package(Threads REQUIRED) -target_link_libraries(cinatra INTERFACE Threads::Threads asio) -target_compile_definitions(cinatra INTERFACE -DASIO_STANDALONE) +target_link_libraries(httplib INTERFACE Threads::Threads) add_executable(${PROJECT_NAME}-server ${CMAKE_CURRENT_SOURCE_DIR}/flatline_server.cpp) set_target_properties(${PROJECT_NAME}-server PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") -target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml cinatra jsoncpp_static structopt spdlog::spdlog "stdc++fs") +target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml httplib jsoncpp_static structopt spdlog::spdlog "stdc++fs") add_custom_command( TARGET ${PROJECT_NAME}-server POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt $/flatline.LICENSE.txt - COMMAND ${CMAKE_COMMAND} -E copy ${asio_SOURCE_DIR}/asio/LICENSE_1_0.txt $/asio.LICENSE.txt - COMMAND ${CMAKE_COMMAND} -E copy ${cinatra_SOURCE_DIR}/LICENSE $/cinatra.LICENSE.txt + COMMAND ${CMAKE_COMMAND} -E copy ${httplib_SOURCE_DIR}/LICENSE $/httplib.LICENSE.txt COMMAND ${CMAKE_COMMAND} -E copy ${jsoncpp_SOURCE_DIR}/LICENSE $/jsoncpp.LICENSE.txt COMMAND ${CMAKE_COMMAND} -E copy ${llama_cpp_SOURCE_DIR}/LICENSE $/llama_cpp.LICENSE.txt COMMAND ${CMAKE_COMMAND} -E copy ${spdlog_SOURCE_DIR}/LICENSE $/spdlog.LICENSE.txt diff --git a/flatline_server.cpp b/flatline_server.cpp index d7da50b..650e096 100644 --- a/flatline_server.cpp +++ b/flatline_server.cpp @@ -7,12 +7,15 @@ #include #include -#include #include #include #include #include +#define CPPHTTPLIB_THREAD_POOL_COUNT 1 +#include +#undef CPPHTTPLIB_THREAD_POOL_COUNT + namespace { std::shared_ptr logger() { static auto logger_ = spdlog::stdout_color_mt("flatline"); @@ -115,12 +118,12 @@ class llama_cpp_model { }; } // namespace -std::optional try_to_parse_json(cinatra::request const &req) { +std::optional try_to_parse_json(httplib::Request const &req) { Json::CharReaderBuilder builder; const std::unique_ptr reader(builder.newCharReader()); Json::Value root; JSONCPP_STRING err; - std::string_view body = req.body(); + std::string_view body = req.body; logger()->info("request {}", body); if (!reader->parse(body.data(), body.data() + body.size(), &root, &err)) { return std::nullopt; @@ -150,7 +153,7 @@ std::string make_response_json(std::vector const &next_token_logits) { #include struct app_options { - std::optional port = "57045"; + std::optional port = 57045; std::optional model_path; std::optional numa = true; std::optional n_threads = -1; @@ -174,60 +177,62 @@ int main(int argc, char **argv) { *options.model_path, infer_thread_num, *options.n_gpu_layers); logger()->info("model loading finished"); - cinatra::http_server server(server_thread_num); - server.listen("0.0.0.0", *options.port); - server.set_http_handler( - "/", [](cinatra::request &req, cinatra::response &res) { - res.set_status_and_content(cinatra::status_type::ok, - "Flatline backend server is available"); - }); - server.set_http_handler( - "/config", [&options](cinatra::request &req, cinatra::response &res) { - Json::Value config; - config["port"] = *options.port; - config["model_path"] = *options.model_path; - config["numa"] = *options.numa; - config["n_threads"] = *options.n_threads; - config["n_gpu_layers"] = *options.n_gpu_layers; - Json::FastWriter json_fast_writer; - res.set_status_and_content(cinatra::status_type::ok, - json_fast_writer.write(config)); - }); - auto calc_next_token_logits_func = [&model](cinatra::request &req, - cinatra::response &res) { + httplib::Server server; + server.Get("/", [](httplib::Request const &req, httplib::Response &res) { + res.set_content("Flatline backend server is available", "text/plain"); + }); + server.Get("/config", [&options](httplib::Request const &req, + httplib::Response &res) { + Json::Value config; + config["port"] = *options.port; + config["model_path"] = *options.model_path; + config["numa"] = *options.numa; + config["n_threads"] = *options.n_threads; + config["n_gpu_layers"] = *options.n_gpu_layers; + Json::FastWriter json_fast_writer; + res.set_content(json_fast_writer.write(config), "application/json"); + }); + constexpr int status_bad_request = 400; + std::mutex computing_resource_mutex; + auto calc_next_token_logits_func = [&model, &computing_resource_mutex]( + httplib::Request const &req, + httplib::Response &res) { // Header check if (req.get_header_value("Content-type") != "application/json") { - res.set_status_and_content( - cinatra::status_type::bad_request, - "\"Content-type\" must be \"application/json\""); + res.status = status_bad_request; + res.set_content("\"Content-type\" must be \"application/json\"", + "text/plain"); logger()->info("Content-type is not application/json"); return; } // Data check & parse std::optional root_opt = try_to_parse_json(req); if (!root_opt) { - res.set_status_and_content(cinatra::status_type::bad_request, - "JSON data is broken"); + res.status = status_bad_request; + res.set_content("JSON data is broken", "text/plain"); logger()->info("JSON data is broken"); return; } Json::Value const &root = *root_opt; std::vector input_tokens = get_request_data(root); - // Calc next token logits - std::vector next_token_logits = - model.calc_next_token_logits(input_tokens); + std::vector next_token_logits; + { + // lock + std::unique_lock lock(computing_resource_mutex); + + // Calc next token logits + next_token_logits = model.calc_next_token_logits(input_tokens); + } // Send response - res.add_header("Content-type", "application/json"); std::string response_json = make_response_json(next_token_logits); - res.set_status_and_content(cinatra::status_type::ok, response_json.c_str()); + res.set_content(response_json.c_str(), "application/json"); logger()->info("sent response {}", std::string(response_json.c_str()).substr(0, 128) + "..."); }; - server.set_http_handler("/v1/calc_next_token_logits", - calc_next_token_logits_func); - server.run(); + server.Post("/v1/calc_next_token_logits", calc_next_token_logits_func); + server.listen("0.0.0.0", *options.port); llama_backend_free();