From c4bd681a77a552ef03ce2bb5de8ecc2d4cded564 Mon Sep 17 00:00:00 2001
From: okada <kokuzen@gmail.com>
Date: Sat, 9 Dec 2023 14:04:26 +0900
Subject: [PATCH] replace cinatra with cpp-httplib

---
 CMakeLists.txt      | 32 ++++++------------
 flatline_server.cpp | 81 ++++++++++++++++++++++++---------------------
 2 files changed, 54 insertions(+), 59 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b199b7..640d00e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,12 +15,6 @@ FetchContent_Declare(
   GIT_TAG add_pfnet_plamo_13b
 )
 
-FetchContent_Declare(
-  asio
-  GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git
-  GIT_TAG asio-1-28-1
-)
-
 set(JSONCPP_WITH_TESTS OFF)
 set(JSONCPP_WITH_POST_BUILD_UNITTEST OFF)
 set(JSONCPP_WITH_TESTS OFF)
@@ -38,33 +32,29 @@ set(SPDLOG_BUILD_SHARED OFF)
 FetchContent_Declare(
   spdlog URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.12.0.zip")
 
-FetchContent_MakeAvailable(llama_cpp asio jsoncpp structopt spdlog)
-
-add_library(asio INTERFACE)
-target_include_directories(asio INTERFACE "${asio_SOURCE_DIR}/asio/include")
+FetchContent_MakeAvailable(llama_cpp jsoncpp structopt spdlog)
 
 FetchContent_Declare(
-  cinatra
-  GIT_REPOSITORY https://github.com/okdshin/cinatra.git
-  GIT_TAG v0.8.0_g++10
+  httplib
+  GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
+  GIT_TAG v0.14.2
 )
-FetchContent_Populate(cinatra)
-add_library(cinatra INTERFACE)
-target_include_directories(cinatra INTERFACE "${cinatra_SOURCE_DIR}/include")
+
+FetchContent_Populate(httplib)
+add_library(httplib INTERFACE)
+target_include_directories(httplib INTERFACE "${httplib_SOURCE_DIR}")
 find_package(Threads REQUIRED)
-target_link_libraries(cinatra INTERFACE Threads::Threads asio)
-target_compile_definitions(cinatra INTERFACE -DASIO_STANDALONE)
+target_link_libraries(httplib INTERFACE Threads::Threads)
 
 add_executable(${PROJECT_NAME}-server
                ${CMAKE_CURRENT_SOURCE_DIR}/flatline_server.cpp)
 set_target_properties(${PROJECT_NAME}-server PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
-target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml cinatra jsoncpp_static structopt spdlog::spdlog "stdc++fs")
+target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml httplib jsoncpp_static structopt spdlog::spdlog "stdc++fs")
 add_custom_command(
     TARGET ${PROJECT_NAME}-server
     POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/flatline.LICENSE.txt
-    COMMAND ${CMAKE_COMMAND} -E copy ${asio_SOURCE_DIR}/asio/LICENSE_1_0.txt $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/asio.LICENSE.txt
-    COMMAND ${CMAKE_COMMAND} -E copy ${cinatra_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/cinatra.LICENSE.txt
+    COMMAND ${CMAKE_COMMAND} -E copy ${httplib_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/httplib.LICENSE.txt
     COMMAND ${CMAKE_COMMAND} -E copy ${jsoncpp_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/jsoncpp.LICENSE.txt
     COMMAND ${CMAKE_COMMAND} -E copy ${llama_cpp_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/llama_cpp.LICENSE.txt
     COMMAND ${CMAKE_COMMAND} -E copy ${spdlog_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/spdlog.LICENSE.txt
diff --git a/flatline_server.cpp b/flatline_server.cpp
index d7da50b..650e096 100644
--- a/flatline_server.cpp
+++ b/flatline_server.cpp
@@ -7,12 +7,15 @@
 #include <tuple>
 
 #include <array>
-#include <cinatra.hpp>
 #include <iostream>
 #include <memory>
 #include <optional>
 #include <stdexcept>
 
+#define CPPHTTPLIB_THREAD_POOL_COUNT 1
+#include <httplib.h>
+#undef CPPHTTPLIB_THREAD_POOL_COUNT
+
 namespace {
 std::shared_ptr<spdlog::logger> logger() {
   static auto logger_ = spdlog::stdout_color_mt("flatline");
@@ -115,12 +118,12 @@ class llama_cpp_model {
 };
 } // namespace
 
-std::optional<Json::Value> try_to_parse_json(cinatra::request const &req) {
+std::optional<Json::Value> try_to_parse_json(httplib::Request const &req) {
   Json::CharReaderBuilder builder;
   const std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
   Json::Value root;
   JSONCPP_STRING err;
-  std::string_view body = req.body();
+  std::string_view body = req.body;
   logger()->info("request {}", body);
   if (!reader->parse(body.data(), body.data() + body.size(), &root, &err)) {
     return std::nullopt;
@@ -150,7 +153,7 @@ std::string make_response_json(std::vector<float> const &next_token_logits) {
 
 #include <structopt/app.hpp>
 struct app_options {
-  std::optional<std::string> port = "57045";
+  std::optional<int> port = 57045;
   std::optional<std::string> model_path;
   std::optional<bool> numa = true;
   std::optional<int> n_threads = -1;
@@ -174,60 +177,62 @@ int main(int argc, char **argv) {
       *options.model_path, infer_thread_num, *options.n_gpu_layers);
   logger()->info("model loading finished");
 
-  cinatra::http_server server(server_thread_num);
-  server.listen("0.0.0.0", *options.port);
-  server.set_http_handler<cinatra::GET, cinatra::POST>(
-      "/", [](cinatra::request &req, cinatra::response &res) {
-        res.set_status_and_content(cinatra::status_type::ok,
-                                   "Flatline backend server is available");
-      });
-  server.set_http_handler<cinatra::GET>(
-      "/config", [&options](cinatra::request &req, cinatra::response &res) {
-        Json::Value config;
-        config["port"] = *options.port;
-        config["model_path"] = *options.model_path;
-        config["numa"] = *options.numa;
-        config["n_threads"] = *options.n_threads;
-        config["n_gpu_layers"] = *options.n_gpu_layers;
-        Json::FastWriter json_fast_writer;
-        res.set_status_and_content(cinatra::status_type::ok,
-                                   json_fast_writer.write(config));
-      });
-  auto calc_next_token_logits_func = [&model](cinatra::request &req,
-                                              cinatra::response &res) {
+  httplib::Server server;
+  server.Get("/", [](httplib::Request const &req, httplib::Response &res) {
+    res.set_content("Flatline backend server is available", "text/plain");
+  });
+  server.Get("/config", [&options](httplib::Request const &req,
+                                   httplib::Response &res) {
+    Json::Value config;
+    config["port"] = *options.port;
+    config["model_path"] = *options.model_path;
+    config["numa"] = *options.numa;
+    config["n_threads"] = *options.n_threads;
+    config["n_gpu_layers"] = *options.n_gpu_layers;
+    Json::FastWriter json_fast_writer;
+    res.set_content(json_fast_writer.write(config), "application/json");
+  });
+  constexpr int status_bad_request = 400;
+  std::mutex computing_resource_mutex;
+  auto calc_next_token_logits_func = [&model, &computing_resource_mutex](
+                                         httplib::Request const &req,
+                                         httplib::Response &res) {
     // Header check
     if (req.get_header_value("Content-type") != "application/json") {
-      res.set_status_and_content(
-          cinatra::status_type::bad_request,
-          "\"Content-type\" must be \"application/json\"");
+      res.status = status_bad_request;
+      res.set_content("\"Content-type\" must be \"application/json\"",
+                      "text/plain");
       logger()->info("Content-type is not application/json");
       return;
     }
     // Data check & parse
     std::optional<Json::Value> root_opt = try_to_parse_json(req);
     if (!root_opt) {
-      res.set_status_and_content(cinatra::status_type::bad_request,
-                                 "JSON data is broken");
+      res.status = status_bad_request;
+      res.set_content("JSON data is broken", "text/plain");
       logger()->info("JSON data is broken");
       return;
     }
     Json::Value const &root = *root_opt;
     std::vector<int> input_tokens = get_request_data(root);
 
-    // Calc next token logits
-    std::vector<float> next_token_logits =
-        model.calc_next_token_logits(input_tokens);
+    std::vector<float> next_token_logits;
+    {
+      // lock
+      std::unique_lock lock(computing_resource_mutex);
+
+      // Calc next token logits
+      next_token_logits = model.calc_next_token_logits(input_tokens);
+    }
 
     // Send response
-    res.add_header("Content-type", "application/json");
     std::string response_json = make_response_json(next_token_logits);
-    res.set_status_and_content(cinatra::status_type::ok, response_json.c_str());
+    res.set_content(response_json.c_str(), "application/json");
     logger()->info("sent response {}",
                    std::string(response_json.c_str()).substr(0, 128) + "...");
   };
-  server.set_http_handler<cinatra::POST>("/v1/calc_next_token_logits",
-                                         calc_next_token_logits_func);
-  server.run();
+  server.Post("/v1/calc_next_token_logits", calc_next_token_logits_func);
+  server.listen("0.0.0.0", *options.port);
 
   llama_backend_free();