From 6f0dbf6ab087bcd286fb78560099ca0458316735 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 8 Jul 2024 09:34:35 +0300
Subject: [PATCH] infill : assert prefix/suffix tokens + remove old space logic
 (#8351)

---
 common/log.h               |  2 +-
 examples/infill/infill.cpp | 25 ++++++++-----------------
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/common/log.h b/common/log.h
index 09fa63c2677a7..1bc5328ce3e11 100644
--- a/common/log.h
+++ b/common/log.h
@@ -630,7 +630,7 @@ inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
     buf << "[ ";
 
     bool first = true;
-    for (const auto &token : tokens)
+    for (const auto & token : tokens)
     {
         if (!first) {
             buf << ", ";
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
index 0e682154d5f6b..dc93d2301391c 100644
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -204,21 +204,17 @@ int main(int argc, char ** argv) {
     GGML_ASSERT(llama_add_eos_token(model) != 1);
     LOG("add_bos: %d\n", add_bos);
 
-    bool suff_rm_leading_spc = params.escape;
-    if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
-        params.input_suffix.erase(0, 1);
-        suff_rm_leading_spc = false;
-    }
     std::vector<llama_token> embd_inp;
     std::vector<llama_token> embd_end;
     std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
     std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
-    const int space_token = 29871;
-    if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
-        inp_sfx.erase(inp_sfx.begin());
-    }
+
+    GGML_ASSERT(llama_token_prefix(model) >= 0);
+    GGML_ASSERT(llama_token_suffix(model) >= 0);
+
     inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
     inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
+
     embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
     embd_end = params.spm_infill ? inp_pfx : inp_sfx;
     if (add_bos) {
@@ -516,19 +512,14 @@ int main(int argc, char ** argv) {
                     string_process_escapes(params.input_prefix);
                     string_process_escapes(params.input_suffix);
                 }
-                suff_rm_leading_spc = params.escape;
-                if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
-                    params.input_suffix.erase(0, 1);
-                    suff_rm_leading_spc = false;
-                }
+
                 // tokenize new prefix and suffix
                 std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
                 std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
-                if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
-                    inp_sfx.erase(inp_sfx.begin());
-                }
+
                 inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
                 inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
+
                 embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
                 embd_end = params.spm_infill ? inp_pfx : inp_sfx;
                 if (add_bos) {