From dfe31e048417d6dc099ac288b2ba3dd98cad1bc9 Mon Sep 17 00:00:00 2001 From: VJHack Date: Mon, 9 Sep 2024 22:06:56 -0500 Subject: [PATCH 01/18] Adding loading page for '/' server requests --- Makefile | 1 + examples/server/CMakeLists.txt | 1 + examples/server/public/loading.html | 12 ++++++++++++ examples/server/server.cpp | 19 ++++++++++++++++--- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 examples/server/public/loading.html diff --git a/Makefile b/Makefile index 97ef37c0e6054..ae2e17c15ad5e 100644 --- a/Makefile +++ b/Makefile @@ -1430,6 +1430,7 @@ llama-server: \ examples/server/theme-snowstorm.css.hpp \ examples/server/index.html.hpp \ examples/server/index-new.html.hpp \ + examples/server/loading.html.hpp \ examples/server/index.js.hpp \ examples/server/completion.js.hpp \ examples/server/system-prompts.js.hpp \ diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index dbe41f1fd1120..b62f813c869b9 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -25,6 +25,7 @@ set(PUBLIC_ASSETS theme-snowstorm.css index.html index-new.html + loading.html index.js completion.js system-prompts.js diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html new file mode 100644 index 0000000000000..5e0a837a67146 --- /dev/null +++ b/examples/server/public/loading.html @@ -0,0 +1,12 @@ + + + + + + +
+ The model is loading. Please wait.
+ The user interface will appear soon. +
+ + \ No newline at end of file diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7495821f99c32..e10f8fed5ab2a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -23,6 +23,7 @@ #include "theme-snowstorm.css.hpp" #include "index.html.hpp" #include "index-new.html.hpp" +#include "loading.html.hpp" #include "index.js.hpp" #include "completion.js.hpp" #include "system-prompts.js.hpp" @@ -2591,11 +2592,22 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) { + auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); - return false; + httplib::Request & modified_req = (httplib::Request &) req; + const char* path_c = modified_req.path.c_str(); + int path_c_len = strlen(path_c); + char last_five[6]; + strcpy(last_five, path_c + (path_c_len -5)); + + if ((strcmp(path_c, "/") == 0) || (strcmp(last_five, ".html") == 0)) { + modified_req.path = "/loading.html"; + } + else { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + return false; + } } return true; }; @@ -3162,6 +3174,7 @@ int main(int argc, char ** argv) { svr->Get("/theme-polarnight.css", handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8")); svr->Get("/theme-snowstorm.css", handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8")); svr->Get("/index-new.html", handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8")); + svr->Get("/loading.html", handle_static_file(loading_html, loading_html_len, "text/html; charset=utf-8")); svr->Get("/system-prompts.js", handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8")); svr->Get("/prompt-formats.js", handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8")); From dab4b49f04d1a176fe4510592455fb5424166b56 Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:20:11 -0500 Subject: [PATCH 02/18] set content when model is loading --- examples/server/server.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e10f8fed5ab2a..0b17dd71578e4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2476,7 +2476,6 @@ int main(int argc, char ** argv) { #endif std::atomic state{SERVER_STATE_LOADING_MODEL}; - svr->set_default_headers({{"Server", "llama.cpp"}}); // CORS preflight @@ -2592,22 +2591,11 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { + auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - httplib::Request & modified_req = (httplib::Request &) req; - const char* path_c = modified_req.path.c_str(); - int path_c_len = strlen(path_c); - char last_five[6]; - strcpy(last_five, path_c + (path_c_len -5)); - - if ((strcmp(path_c, "/") == 0) || (strcmp(last_five, ".html") == 0)) { - modified_req.path = "/loading.html"; - } - else { - res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); - return false; - } + res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); + return false; } return true; }; From 19bc86307fc69d5f0892e9e197361b61d1c835ef Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:23:09 -0500 Subject: [PATCH 03/18] removed loading html file --- examples/server/public/loading.html | 12 ------------ examples/server/server.cpp | 2 -- 2 files changed, 14 deletions(-) delete mode 100644 examples/server/public/loading.html diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html deleted file mode 100644 index 5e0a837a67146..0000000000000 --- a/examples/server/public/loading.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - -
- The model is loading. Please wait.
- The user interface will appear soon. -
- - \ No newline at end of file diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 0b17dd71578e4..39f8111f851e4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -23,7 +23,6 @@ #include "theme-snowstorm.css.hpp" #include "index.html.hpp" #include "index-new.html.hpp" -#include "loading.html.hpp" #include "index.js.hpp" #include "completion.js.hpp" #include "system-prompts.js.hpp" @@ -3162,7 +3161,6 @@ int main(int argc, char ** argv) { svr->Get("/theme-polarnight.css", handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8")); svr->Get("/theme-snowstorm.css", handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8")); svr->Get("/index-new.html", handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8")); - svr->Get("/loading.html", handle_static_file(loading_html, loading_html_len, "text/html; charset=utf-8")); svr->Get("/system-prompts.js", handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8")); svr->Get("/prompt-formats.js", handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8")); From 125737a255c582cb41f449e3b4908913f314990e Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:23:57 -0500 Subject: [PATCH 04/18] updated cmakelist --- examples/server/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index b62f813c869b9..dbe41f1fd1120 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -25,7 +25,6 @@ set(PUBLIC_ASSETS theme-snowstorm.css index.html index-new.html - loading.html index.js completion.js system-prompts.js From 3dd73ca6626f6878699a78c40986d0222588d3de Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:24:39 -0500 Subject: [PATCH 05/18] updated makefile --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index ae2e17c15ad5e..97ef37c0e6054 100644 --- a/Makefile +++ b/Makefile @@ -1430,7 +1430,6 @@ llama-server: \ examples/server/theme-snowstorm.css.hpp \ examples/server/index.html.hpp \ examples/server/index-new.html.hpp \ - examples/server/loading.html.hpp \ examples/server/index.js.hpp \ examples/server/completion.js.hpp \ examples/server/system-prompts.js.hpp \ From 1ff1aa722aa41edeefc5caf6a7ea2a3917ad5c7e Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:25:32 -0500 Subject: [PATCH 06/18] cleaned up whitespace --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 39f8111f851e4..43f061aecf006 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2475,6 +2475,7 @@ int main(int argc, char ** argv) { #endif std::atomic state{SERVER_STATE_LOADING_MODEL}; + svr->set_default_headers({{"Server", "llama.cpp"}}); // CORS preflight From 9d3424a3c15f9b03bdc409e64ad9cb735ae5cf67 Mon Sep 17 00:00:00 2001 From: VJHack Date: Tue, 10 Sep 2024 22:26:58 -0500 Subject: [PATCH 07/18] cleanup for PR removed error --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 43f061aecf006..e03cddf77266f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2591,7 +2591,7 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) { + auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); From 5ca179c8999e94617d84c85a1a68d32148394658 Mon Sep 17 00:00:00 2001 From: VJHack Date: Wed, 11 Sep 2024 22:54:52 -0500 Subject: [PATCH 08/18] updated server test to handle 503 HTML --- examples/server/tests/features/steps/steps.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 65b71a8e85db1..822fe443775f1 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1208,7 +1208,10 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status - slots = await slots_response.json() + try: + slots = await slots_response.json() + except: + slots = await slots_response.text() if context.debug: print(f"slots responses {slots}\n") if status_code == 503 and status_code == expected_http_status_code: @@ -1372,4 +1375,4 @@ def server_log(in_stream, out_stream): thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr)) thread_stderr.start() - print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") \ No newline at end of file From 161bf2205d98d7655c47276921f69441d0d3c4da Mon Sep 17 00:00:00 2001 From: VJHack Date: Wed, 11 Sep 2024 23:11:03 -0500 Subject: [PATCH 09/18] updated server test to handle 503 HTML --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e03cddf77266f..733b89ccfe84d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2595,6 +2595,7 @@ int main(int argc, char ** argv) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); + res.status = 503; return false; } return true; From 8b7daaaef280a3df39f21c741516b834530224ed Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 20:44:45 -0500 Subject: [PATCH 10/18] =?UTF-8?q?ca=E2=80=A0ch=20503=20before=20parsing=20?= =?UTF-8?q?json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/server/tests/features/steps/steps.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 822fe443775f1..0d994af29c3ca 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1208,14 +1208,12 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status - try: - slots = await slots_response.json() - except: - slots = await slots_response.text() - if context.debug: - print(f"slots responses {slots}\n") if status_code == 503 and status_code == expected_http_status_code: return + slots = await slots_response.json() + if context.debug: + print(f"slots responses {slots}\n") + if status_code == 200 and status_code == expected_http_status_code: n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots) n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots) From daf64fc4a9c45c7db748c89d5ba0cdb128bf384a Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 20:57:51 -0500 Subject: [PATCH 11/18] revert test --- examples/server/tests/features/steps/steps.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 0d994af29c3ca..65b71a8e85db1 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1208,12 +1208,11 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status - if status_code == 503 and status_code == expected_http_status_code: - return slots = await slots_response.json() if context.debug: print(f"slots responses {slots}\n") - + if status_code == 503 and status_code == expected_http_status_code: + return if status_code == 200 and status_code == expected_http_status_code: n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots) n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots) @@ -1373,4 +1372,4 @@ def server_log(in_stream, out_stream): thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr)) thread_stderr.start() - print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") \ No newline at end of file + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") From cb13382136119b2c8533c721d10798082cde878f Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 21:44:52 -0500 Subject: [PATCH 12/18] account for both api and web browser requests --- examples/server/server.cpp | 10 +++++++--- examples/server/tests/features/server.feature | 10 +++++++++- examples/server/tests/features/steps/steps.py | 3 ++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 733b89ccfe84d..22343cea55a92 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2591,11 +2591,15 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) { + auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); - res.status = 503; + if(req.path == "/"){ + res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); + res.status = 503; + } else { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + } return false; } return true; diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index b55971454afc3..6a3ffe3175123 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -105,8 +105,16 @@ Feature: llama.cpp server Given first token is removed Then tokens can be detokenized + Scenario: Tokenize with pieces + When tokenizing with pieces: + """ + What is the capital of Germany? + 媽 + """ + Then tokens are given with pieces + Scenario: Models available Given available models Then 1 models are supported Then model 0 is identified by tinyllama-2 - Then model 0 is trained on 128 tokens context + Then model 0 is trained on 128 tokens context \ No newline at end of file diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 65b71a8e85db1..c463decf02081 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1208,6 +1208,7 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status + print(await slots_response.text()) slots = await slots_response.json() if context.debug: print(f"slots responses {slots}\n") @@ -1372,4 +1373,4 @@ def server_log(in_stream, out_stream): thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr)) thread_stderr.start() - print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") \ No newline at end of file From 42abdd020743a63ee80a74fc261dc8c8f554d1c7 Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 22:04:08 -0500 Subject: [PATCH 13/18] precommit corrections --- examples/server/tests/features/steps/steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index c463decf02081..22b889e120364 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1373,4 +1373,4 @@ def server_log(in_stream, out_stream): thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr)) thread_stderr.start() - print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") \ No newline at end of file + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") From cd80fce5e8eb7b03c857907f6f9ac577fe574501 Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 22:16:45 -0500 Subject: [PATCH 14/18] eol fix --- .pre-commit-config.yaml | 10 +++++----- examples/server/tests/features/server.feature | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 91d7916285081..84a81bb56d087 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,8 +9,8 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - additional_dependencies: [flake8-no-print] +# - repo: https://github.com/PyCQA/flake8 +# rev: 7.0.0 +# hooks: +# - id: flake8 +# additional_dependencies: [flake8-no-print] diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 6a3ffe3175123..15e24c624af37 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -117,4 +117,4 @@ Feature: llama.cpp server Given available models Then 1 models are supported Then model 0 is identified by tinyllama-2 - Then model 0 is trained on 128 tokens context \ No newline at end of file + Then model 0 is trained on 128 tokens context From e51eb598618a50ca03e08bd19655012c72bc1ea3 Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 22:27:34 -0500 Subject: [PATCH 15/18] revert changes to pre-commit --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 84a81bb56d087..91d7916285081 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,8 +9,8 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files -# - repo: https://github.com/PyCQA/flake8 -# rev: 7.0.0 -# hooks: -# - id: flake8 -# additional_dependencies: [flake8-no-print] +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-no-print] From df9f16747f26a0128365c230fe5f9852d700e1e8 Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 23:04:53 -0500 Subject: [PATCH 16/18] removed print statement --- examples/server/tests/features/steps/steps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index a66723eb1abef..11587dd64075a 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1237,7 +1237,6 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status - print(await slots_response.text()) slots = await slots_response.json() if context.debug: print(f"slots responses {slots}\n") From 739ea75015719bdb341601f3516b027afc3e3e71 Mon Sep 17 00:00:00 2001 From: VJHack Date: Thu, 12 Sep 2024 23:14:29 -0500 Subject: [PATCH 17/18] made loading message more descriptive --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a9964812dcfd6..2f41f45677c72 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2596,7 +2596,7 @@ int main(int argc, char ** argv) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { if(req.path == "/"){ - res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); + res.set_content("The model is loading. Please wait.
The user interface will appear soon.
You may need to refresh the page.", "text/html; charset=utf-8"); res.status = 503; } else { res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); From a7feae74e7c0253e02b47938e93be56901f88483 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 13 Sep 2024 12:58:00 +0200 Subject: [PATCH 18/18] also support .html files --- Makefile | 1 + examples/server/CMakeLists.txt | 1 + examples/server/public/loading.html | 12 ++++++++++++ examples/server/server.cpp | 6 ++++-- 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 examples/server/public/loading.html diff --git a/Makefile b/Makefile index 8d3fd3ee83f61..f41887a4d3d8c 100644 --- a/Makefile +++ b/Makefile @@ -1440,6 +1440,7 @@ llama-server: \ examples/server/system-prompts.js.hpp \ examples/server/prompt-formats.js.hpp \ examples/server/json-schema-to-grammar.mjs.hpp \ + examples/server/loading.html.hpp \ common/json.hpp \ common/stb_image.h \ $(OBJ_ALL) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index dbe41f1fd1120..580f3a8248cf5 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -30,6 +30,7 @@ set(PUBLIC_ASSETS system-prompts.js prompt-formats.js json-schema-to-grammar.mjs + loading.html ) foreach(asset ${PUBLIC_ASSETS}) diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html new file mode 100644 index 0000000000000..c3fd19a0f5ae7 --- /dev/null +++ b/examples/server/public/loading.html @@ -0,0 +1,12 @@ + + + + + + +
+ The model is loading. Please wait.
+ The user interface will appear soon. +
+ + diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2f41f45677c72..73cd6aae75e97 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -28,6 +28,7 @@ #include "system-prompts.js.hpp" #include "prompt-formats.js.hpp" #include "json-schema-to-grammar.mjs.hpp" +#include "loading.html.hpp" #include #include @@ -2595,8 +2596,9 @@ int main(int argc, char ** argv) { auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - if(req.path == "/"){ - res.set_content("The model is loading. Please wait.
The user interface will appear soon.
You may need to refresh the page.", "text/html; charset=utf-8"); + auto tmp = string_split(req.path, '.'); + if (req.path == "/" || tmp.back() == "html") { + res.set_content(reinterpret_cast(loading_html), loading_html_len, "text/html; charset=utf-8"); res.status = 503; } else { res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));