From dfe31e048417d6dc099ac288b2ba3dd98cad1bc9 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Mon, 9 Sep 2024 22:06:56 -0500
Subject: [PATCH 01/18] Adding loading page for '/' server requests

---
 Makefile                            |  1 +
 examples/server/CMakeLists.txt      |  1 +
 examples/server/public/loading.html | 12 ++++++++++++
 examples/server/server.cpp          | 19 ++++++++++++++++---
 4 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 examples/server/public/loading.html
diff --git a/Makefile b/Makefile
index 97ef37c0e6054..ae2e17c15ad5e 100644
--- a/Makefile
+++ b/Makefile
@@ -1430,6 +1430,7 @@ llama-server: \
 	examples/server/theme-snowstorm.css.hpp \
 	examples/server/index.html.hpp \
 	examples/server/index-new.html.hpp \
+	examples/server/loading.html.hpp \
 	examples/server/index.js.hpp \
 	examples/server/completion.js.hpp \
 	examples/server/system-prompts.js.hpp \
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index dbe41f1fd1120..b62f813c869b9 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -25,6 +25,7 @@ set(PUBLIC_ASSETS
     theme-snowstorm.css
     index.html
     index-new.html
+    loading.html
     index.js
     completion.js
     system-prompts.js
diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html
new file mode 100644
index 0000000000000..5e0a837a67146
--- /dev/null
+++ b/examples/server/public/loading.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="refresh" content="5">
+    </head>
+    <body>
+        <div id="loading">
+            The model is loading. Please wait.<br/>
+            The user interface will appear soon.
+        </div>
+    </body>
+</html>
\ No newline at end of file
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 7495821f99c32..e10f8fed5ab2a 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -23,6 +23,7 @@
 #include "theme-snowstorm.css.hpp"
 #include "index.html.hpp"
 #include "index-new.html.hpp"
+#include "loading.html.hpp"
 #include "index.js.hpp"
 #include "completion.js.hpp"
 #include "system-prompts.js.hpp"
@@ -2591,11 +2592,22 @@ int main(int argc, char ** argv) {
         return false;
     };
 
-    auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
+    auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
-            res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
-            return false;
+            httplib::Request & modified_req = (httplib::Request &) req;
+            const char* path_c = modified_req.path.c_str();
+            int path_c_len = strlen(path_c);
+            char last_five[6];
+            strcpy(last_five, path_c + (path_c_len -5));
+
+            if ((strcmp(path_c, "/") == 0) || (strcmp(last_five, ".html") == 0)) {
+                modified_req.path = "/loading.html";
+            }
+            else {
+                res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
+                return false;
+            }
         }
         return true;
     };
@@ -3162,6 +3174,7 @@ int main(int argc, char ** argv) {
     svr->Get("/theme-polarnight.css",  handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8"));
     svr->Get("/theme-snowstorm.css",   handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8"));
     svr->Get("/index-new.html",        handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8"));
+    svr->Get("/loading.html",          handle_static_file(loading_html, loading_html_len, "text/html; charset=utf-8"));
     svr->Get("/system-prompts.js",     handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8"));
     svr->Get("/prompt-formats.js",     handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8"));
 

From dab4b49f04d1a176fe4510592455fb5424166b56 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:20:11 -0500
Subject: [PATCH 02/18] set content when model is loading

---
 examples/server/server.cpp | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index e10f8fed5ab2a..0b17dd71578e4 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2476,7 +2476,6 @@ int main(int argc, char ** argv) {
 #endif
 
     std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
-
     svr->set_default_headers({{"Server", "llama.cpp"}});
 
     // CORS preflight
@@ -2592,22 +2591,11 @@ int main(int argc, char ** argv) {
         return false;
     };
 
-    auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
+    auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
-            httplib::Request & modified_req = (httplib::Request &) req;
-            const char* path_c = modified_req.path.c_str();
-            int path_c_len = strlen(path_c);
-            char last_five[6];
-            strcpy(last_five, path_c + (path_c_len -5));
-
-            if ((strcmp(path_c, "/") == 0) || (strcmp(last_five, ".html") == 0)) {
-                modified_req.path = "/loading.html";
-            }
-            else {
-                res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
-                return false;
-            }
+            res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
+            return false;
         }
         return true;
     };

From 19bc86307fc69d5f0892e9e197361b61d1c835ef Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:23:09 -0500
Subject: [PATCH 03/18] removed loading html file

---
 examples/server/public/loading.html | 12 ------------
 examples/server/server.cpp          |  2 --
 2 files changed, 14 deletions(-)
 delete mode 100644 examples/server/public/loading.html

diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html
deleted file mode 100644
index 5e0a837a67146..0000000000000
--- a/examples/server/public/loading.html
+++ /dev/null
@@ -1,12 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-        <meta http-equiv="refresh" content="5">
-    </head>
-    <body>
-        <div id="loading">
-            The model is loading. Please wait.<br/>
-            The user interface will appear soon.
-        </div>
-    </body>
-</html>
\ No newline at end of file
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 0b17dd71578e4..39f8111f851e4 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -23,7 +23,6 @@
 #include "theme-snowstorm.css.hpp"
 #include "index.html.hpp"
 #include "index-new.html.hpp"
-#include "loading.html.hpp"
 #include "index.js.hpp"
 #include "completion.js.hpp"
 #include "system-prompts.js.hpp"
@@ -3162,7 +3161,6 @@ int main(int argc, char ** argv) {
     svr->Get("/theme-polarnight.css",  handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8"));
     svr->Get("/theme-snowstorm.css",   handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8"));
     svr->Get("/index-new.html",        handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8"));
-    svr->Get("/loading.html",          handle_static_file(loading_html, loading_html_len, "text/html; charset=utf-8"));
     svr->Get("/system-prompts.js",     handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8"));
     svr->Get("/prompt-formats.js",     handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8"));
 

From 125737a255c582cb41f449e3b4908913f314990e Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:23:57 -0500
Subject: [PATCH 04/18] updated cmakelist

---
 examples/server/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index b62f813c869b9..dbe41f1fd1120 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -25,7 +25,6 @@ set(PUBLIC_ASSETS
     theme-snowstorm.css
     index.html
     index-new.html
-    loading.html
     index.js
     completion.js
     system-prompts.js

From 3dd73ca6626f6878699a78c40986d0222588d3de Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:24:39 -0500
Subject: [PATCH 05/18] updated makefile

---
 Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Makefile b/Makefile
index ae2e17c15ad5e..97ef37c0e6054 100644
--- a/Makefile
+++ b/Makefile
@@ -1430,7 +1430,6 @@ llama-server: \
 	examples/server/theme-snowstorm.css.hpp \
 	examples/server/index.html.hpp \
 	examples/server/index-new.html.hpp \
-	examples/server/loading.html.hpp \
 	examples/server/index.js.hpp \
 	examples/server/completion.js.hpp \
 	examples/server/system-prompts.js.hpp \

From 1ff1aa722aa41edeefc5caf6a7ea2a3917ad5c7e Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:25:32 -0500
Subject: [PATCH 06/18] cleaned up whitespace

---
 examples/server/server.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 39f8111f851e4..43f061aecf006 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2475,6 +2475,7 @@ int main(int argc, char ** argv) {
 #endif
 
     std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
+
     svr->set_default_headers({{"Server", "llama.cpp"}});
 
     // CORS preflight

From 9d3424a3c15f9b03bdc409e64ad9cb735ae5cf67 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Tue, 10 Sep 2024 22:26:58 -0500
Subject: [PATCH 07/18] cleanup for PR removed error

---
 examples/server/server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 43f061aecf006..e03cddf77266f 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2591,7 +2591,7 @@ int main(int argc, char ** argv) {
         return false;
     };
 
-    auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
+    auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
             res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");

From 5ca179c8999e94617d84c85a1a68d32148394658 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Wed, 11 Sep 2024 22:54:52 -0500
Subject: [PATCH 08/18] updated server test to handle 503 HTML

---
 examples/server/tests/features/steps/steps.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 65b71a8e85db1..822fe443775f1 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1208,7 +1208,10 @@ async def wait_for_slots_status(context,
         while True:
             async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                 status_code = slots_response.status
-                slots = await slots_response.json()
+                try:
+                    slots = await slots_response.json()
+                except:
+                    slots = await slots_response.text()
                 if context.debug:
                     print(f"slots responses {slots}\n")
                 if status_code == 503 and status_code == expected_http_status_code:
@@ -1372,4 +1375,4 @@ def server_log(in_stream, out_stream):
     thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
     thread_stderr.start()
 
-    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
+    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
\ No newline at end of file

From 161bf2205d98d7655c47276921f69441d0d3c4da Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Wed, 11 Sep 2024 23:11:03 -0500
Subject: [PATCH 09/18] updated server test to handle 503 HTML

---
 examples/server/server.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index e03cddf77266f..733b89ccfe84d 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2595,6 +2595,7 @@ int main(int argc, char ** argv) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
             res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
+            res.status = 503;
             return false;
         }
         return true;

From 8b7daaaef280a3df39f21c741516b834530224ed Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 20:44:45 -0500
Subject: [PATCH 10/18] =?UTF-8?q?ca=E2=80=A0ch=20503=20before=20parsing=20?=
 =?UTF-8?q?json?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/server/tests/features/steps/steps.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 822fe443775f1..0d994af29c3ca 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1208,14 +1208,12 @@ async def wait_for_slots_status(context,
         while True:
             async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                 status_code = slots_response.status
-                try:
-                    slots = await slots_response.json()
-                except:
-                    slots = await slots_response.text()
-                if context.debug:
-                    print(f"slots responses {slots}\n")
                 if status_code == 503 and status_code == expected_http_status_code:
                     return
+                slots = await slots_response.json()
+                if context.debug:
+                    print(f"slots responses {slots}\n")
+
                 if status_code == 200 and status_code == expected_http_status_code:
                     n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
                     n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)

From daf64fc4a9c45c7db748c89d5ba0cdb128bf384a Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 20:57:51 -0500
Subject: [PATCH 11/18] revert test

---
 examples/server/tests/features/steps/steps.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 0d994af29c3ca..65b71a8e85db1 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1208,12 +1208,11 @@ async def wait_for_slots_status(context,
         while True:
             async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                 status_code = slots_response.status
-                if status_code == 503 and status_code == expected_http_status_code:
-                    return
                 slots = await slots_response.json()
                 if context.debug:
                     print(f"slots responses {slots}\n")
-
+                if status_code == 503 and status_code == expected_http_status_code:
+                    return
                 if status_code == 200 and status_code == expected_http_status_code:
                     n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
                     n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)
@@ -1373,4 +1372,4 @@ def server_log(in_stream, out_stream):
     thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
     thread_stderr.start()
 
-    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
\ No newline at end of file
+    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")

From cb13382136119b2c8533c721d10798082cde878f Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 21:44:52 -0500
Subject: [PATCH 12/18] account for both api and web browser requests

---
 examples/server/server.cpp                    | 10 +++++++---
 examples/server/tests/features/server.feature | 10 +++++++++-
 examples/server/tests/features/steps/steps.py |  3 ++-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 733b89ccfe84d..22343cea55a92 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2591,11 +2591,15 @@ int main(int argc, char ** argv) {
         return false;
     };
 
-    auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) {
+    auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
-            res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
-            res.status = 503;
+            if(req.path == "/"){
+                res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
+                res.status = 503;
+            } else {
+                res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
+            }
             return false;
         }
         return true;
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index b55971454afc3..6a3ffe3175123 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -105,8 +105,16 @@ Feature: llama.cpp server
     Given first token is removed
     Then  tokens can be detokenized
 
+  Scenario: Tokenize with pieces
+    When  tokenizing with pieces:
+    """
+    What is the capital of Germany?
+    媽
+    """
+    Then  tokens are given with pieces
+
   Scenario: Models available
     Given available models
     Then  1 models are supported
     Then  model 0 is identified by tinyllama-2
-    Then  model 0 is trained on 128 tokens context
+    Then  model 0 is trained on 128 tokens context
\ No newline at end of file
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 65b71a8e85db1..c463decf02081 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1208,6 +1208,7 @@ async def wait_for_slots_status(context,
         while True:
             async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                 status_code = slots_response.status
+                print(await slots_response.text())
                 slots = await slots_response.json()
                 if context.debug:
                     print(f"slots responses {slots}\n")
@@ -1372,4 +1373,4 @@ def server_log(in_stream, out_stream):
     thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
     thread_stderr.start()
 
-    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
+    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
\ No newline at end of file

From 42abdd020743a63ee80a74fc261dc8c8f554d1c7 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 22:04:08 -0500
Subject: [PATCH 13/18] precommit corrections

---
 examples/server/tests/features/steps/steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index c463decf02081..22b889e120364 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1373,4 +1373,4 @@ def server_log(in_stream, out_stream):
     thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
     thread_stderr.start()
 
-    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
\ No newline at end of file
+    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")

From cd80fce5e8eb7b03c857907f6f9ac577fe574501 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 22:16:45 -0500
Subject: [PATCH 14/18] eol fix

---
 .pre-commit-config.yaml                       | 10 +++++-----
 examples/server/tests/features/server.feature |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 91d7916285081..84a81bb56d087 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,8 +9,8 @@ repos:
   - id: end-of-file-fixer
   - id: check-yaml
   - id: check-added-large-files
-- repo: https://github.com/PyCQA/flake8
-  rev: 7.0.0
-  hooks:
-  -   id: flake8
-      additional_dependencies: [flake8-no-print]
+# - repo: https://github.com/PyCQA/flake8
+#   rev: 7.0.0
+#   hooks:
+#   -   id: flake8
+#       additional_dependencies: [flake8-no-print]
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index 6a3ffe3175123..15e24c624af37 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -117,4 +117,4 @@ Feature: llama.cpp server
     Given available models
     Then  1 models are supported
     Then  model 0 is identified by tinyllama-2
-    Then  model 0 is trained on 128 tokens context
\ No newline at end of file
+    Then  model 0 is trained on 128 tokens context

From e51eb598618a50ca03e08bd19655012c72bc1ea3 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 22:27:34 -0500
Subject: [PATCH 15/18] revert changes to pre-commit

---
 .pre-commit-config.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84a81bb56d087..91d7916285081 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,8 +9,8 @@ repos:
   - id: end-of-file-fixer
   - id: check-yaml
   - id: check-added-large-files
-# - repo: https://github.com/PyCQA/flake8
-#   rev: 7.0.0
-#   hooks:
-#   -   id: flake8
-#       additional_dependencies: [flake8-no-print]
+- repo: https://github.com/PyCQA/flake8
+  rev: 7.0.0
+  hooks:
+  -   id: flake8
+      additional_dependencies: [flake8-no-print]

From df9f16747f26a0128365c230fe5f9852d700e1e8 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 23:04:53 -0500
Subject: [PATCH 16/18] removed print statement

---
 examples/server/tests/features/steps/steps.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index a66723eb1abef..11587dd64075a 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1237,7 +1237,6 @@ async def wait_for_slots_status(context,
         while True:
             async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                 status_code = slots_response.status
-                print(await slots_response.text())
                 slots = await slots_response.json()
                 if context.debug:
                     print(f"slots responses {slots}\n")

From 739ea75015719bdb341601f3516b027afc3e3e71 Mon Sep 17 00:00:00 2001
From: VJHack <flymyplane21@gmail.com>
Date: Thu, 12 Sep 2024 23:14:29 -0500
Subject: [PATCH 17/18] made loading message more descriptive

---
 examples/server/server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index a9964812dcfd6..2f41f45677c72 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2596,7 +2596,7 @@ int main(int argc, char ** argv) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
             if(req.path == "/"){
-                res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
+                res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.<br/>You may need to refresh the page.</body></html>", "text/html; charset=utf-8");
                 res.status = 503;
             } else {
                 res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));

From a7feae74e7c0253e02b47938e93be56901f88483 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 13 Sep 2024 12:58:00 +0200
Subject: [PATCH 18/18] also support .html files

---
 Makefile                            |  1 +
 examples/server/CMakeLists.txt      |  1 +
 examples/server/public/loading.html | 12 ++++++++++++
 examples/server/server.cpp          |  6 ++++--
 4 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 examples/server/public/loading.html

diff --git a/Makefile b/Makefile
index 8d3fd3ee83f61..f41887a4d3d8c 100644
--- a/Makefile
+++ b/Makefile
@@ -1440,6 +1440,7 @@ llama-server: \
 	examples/server/system-prompts.js.hpp \
 	examples/server/prompt-formats.js.hpp \
 	examples/server/json-schema-to-grammar.mjs.hpp \
+	examples/server/loading.html.hpp \
 	common/json.hpp \
 	common/stb_image.h \
 	$(OBJ_ALL)
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index dbe41f1fd1120..580f3a8248cf5 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -30,6 +30,7 @@ set(PUBLIC_ASSETS
     system-prompts.js
     prompt-formats.js
     json-schema-to-grammar.mjs
+    loading.html
 )
 
 foreach(asset ${PUBLIC_ASSETS})
diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html
new file mode 100644
index 0000000000000..c3fd19a0f5ae7
--- /dev/null
+++ b/examples/server/public/loading.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="refresh" content="5">
+    </head>
+    <body>
+        <div id="loading">
+            The model is loading. Please wait.<br/>
+            The user interface will appear soon.
+        </div>
+    </body>
+</html>
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 2f41f45677c72..73cd6aae75e97 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -28,6 +28,7 @@
 #include "system-prompts.js.hpp"
 #include "prompt-formats.js.hpp"
 #include "json-schema-to-grammar.mjs.hpp"
+#include "loading.html.hpp"
 
 #include <atomic>
 #include <chrono>
@@ -2595,8 +2596,9 @@ int main(int argc, char ** argv) {
     auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
-            if(req.path == "/"){
-                res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.<br/>You may need to refresh the page.</body></html>", "text/html; charset=utf-8");
+            auto tmp = string_split(req.path, '.');
+            if (req.path == "/" || tmp.back() == "html") {
+                res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
                 res.status = 503;
             } else {
                 res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));