cont : llama-cli + common [no ci]

ggml-org · Sep 11, 2024 · d206f87 · d206f87
1 parent c1845a9
commit d206f87
Show file tree

Hide file tree

Showing 10 changed files with 299 additions and 207 deletions.
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1950,8 +1950,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             else { std::invalid_argument("invalid value"); }
         }
     ).set_examples({LLAMA_EXAMPLE_BENCH}));
-#ifndef LOG_DISABLE_LOGS
-    // TODO: make this looks less weird
     add_opt(llama_arg(
         {"--log-disable"},
         "Log disable",
@@ -1966,7 +1964,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             gpt_log_set_file(gpt_log_main(), value.c_str());
         }
     ));
-#endif // LOG_DISABLE_LOGS
 
     return ctx_arg;
 }

diff --git a/common/common.cpp b/common/common.cpp
diff --git a/common/common.h b/common/common.h
@@ -374,6 +374,9 @@ static std::vector<T> string_split(const std::string & str, char delim) {
 bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
 void string_process_escapes(std::string & input);
 
+std::string string_from_tokens(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
+std::string string_from_batch (const struct llama_context * ctx, const struct llama_batch & batch);
+
 //
 // Filesystem utils
 //

diff --git a/common/log.cpp b/common/log.cpp
@@ -1,9 +1,11 @@
 #include "log.h"
 
-#include <thread>
-#include <mutex>
-#include <cstdio>
 #include <condition_variable>
+#include <cstdio>
+#include <mutex>
+#include <thread>
+
+int gpt_log_verbosity_env = getenv("LLAMA_LOG") ? atoi(getenv("LLAMA_LOG")) : LOG_DEFAULT_LLAMA;
 
 #define LOG_COLORS // TMP
 
@@ -36,46 +38,56 @@ static int64_t t_us() {
 struct gpt_log_entry {
     enum ggml_log_level level;
 
-    int verbosity;
     int64_t timestamp;
 
     std::vector<char> msg;
 
     // signals the worker thread to stop
     bool is_end;
 
-    void print(FILE * file) {
+    void print(FILE * file = nullptr) const {
+        FILE * fcur = file;
+        if (!fcur) {
+            // stderr displays DBG messages only when the verbosity is high
+            // these messages can still be logged to a file
+            if (level == GGML_LOG_LEVEL_DEBUG && gpt_log_verbosity_env < LOG_DEFAULT_DEBUG) {
+                return;
+            }
+
+            fcur = stdout;
+
+            if (level != GGML_LOG_LEVEL_NONE) {
+                fcur = stderr;
+            }
+        }
+
         if (level != GGML_LOG_LEVEL_NONE) {
             if (timestamp) {
                 // [M.s.ms.us]
-                fprintf(file, "[%04d.%02d.%03d.%03d] ",
+                fprintf(fcur, "" LOG_COL_BLUE "%05d.%02d.%03d.%03d" LOG_COL_DEFAULT " ",
                         (int) (timestamp / 1000000 / 60),
                         (int) (timestamp / 1000000 % 60),
                         (int) (timestamp / 1000 % 1000),
                         (int) (timestamp % 1000));
             }
 
             switch (level) {
-                case GGML_LOG_LEVEL_INFO:
-                    fprintf(file, LOG_COL_GREEN "INF " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_WARN:
-                    fprintf(file, LOG_COL_MAGENTA "WRN " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_ERROR:
-                    fprintf(file, LOG_COL_RED "ERR " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_DEBUG:
-                    fprintf(file, LOG_COL_YELLOW "DBG " LOG_COL_DEFAULT);
-                    break;
+                case GGML_LOG_LEVEL_INFO:  fprintf(fcur, LOG_COL_GREEN   "I " LOG_COL_DEFAULT); break;
+                case GGML_LOG_LEVEL_WARN:  fprintf(fcur, LOG_COL_MAGENTA "W "                ); break;
+                case GGML_LOG_LEVEL_ERROR: fprintf(fcur, LOG_COL_RED     "E "                ); break;
+                case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, LOG_COL_YELLOW  "D "                ); break;
                 default:
                     break;
             }
         }
 
-        fprintf(file, "%s", msg.data());
+        fprintf(fcur, "%s", msg.data());
 
-        fflush(file);
+        if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
+            fprintf(fcur, LOG_COL_DEFAULT);
+        }
+
+        fflush(fcur);
     }
 };
 
@@ -120,7 +132,7 @@ struct gpt_log {
     gpt_log_entry cur;
 
 public:
-    void add(enum ggml_log_level level, int verbosity, const char * fmt, va_list args) {
+    void add(enum ggml_log_level level, const char * fmt, va_list args) {
         std::lock_guard<std::mutex> lock(mtx);
 
         if (!running) {
@@ -130,15 +142,34 @@ struct gpt_log {
         auto & entry = entries[tail];
 
         {
+#if 1
             const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
             if (n >= entry.msg.size()) {
                 entry.msg.resize(n + 1);
                 vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
             }
+#else
+            // hack for bolding arguments
+
+            std::stringstream ss;
+            for (int i = 0; fmt[i] != 0; i++) {
+                if (fmt[i] == '%') {
+                    ss << LOG_COL_BOLD;
+                    while (fmt[i] != ' ' && fmt[i] != ')' && fmt[i] != ']' && fmt[i] != 0) ss << fmt[i++];
+                    ss << LOG_COL_DEFAULT;
+                    if (fmt[i] == 0) break;
+                }
+                ss << fmt[i];
+            }
+            const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
+            if (n >= entry.msg.size()) {
+                entry.msg.resize(n + 1);
+                vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
+            }
+#endif
         }
 
         entry.level = level;
-        entry.verbosity = verbosity;
         entry.timestamp = 0;
         if (timestamps) {
             entry.timestamp = t_us() - t_start;
@@ -192,7 +223,7 @@ struct gpt_log {
                     break;
                 }
 
-                cur.print(stdout);
+                cur.print(); // stdout and stderr
 
                 if (file) {
                     cur.print(file);
@@ -267,10 +298,10 @@ void gpt_log_free(struct gpt_log * log) {
     delete log;
 }
 
-void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, int verbosity, const char * fmt, ...) {
+void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...) {
     va_list args;
     va_start(args, fmt);
-    log->add(level, verbosity, fmt, args);
+    log->add(level, fmt, args);
     va_end(args);
 }
 

diff --git a/common/log.h b/common/log.h
@@ -2,8 +2,6 @@
 
 #include "ggml.h"
 
-#include <cstdarg>
-
 #ifndef __GNUC__
 #    define LOG_ATTRIBUTE_FORMAT(...)
 #elif defined(__MINGW32__)
@@ -12,9 +10,11 @@
 #    define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
 
-#ifndef LOG_VERBOSITY
-#define LOG_VERBOSITY 10
-#endif
+#define LOG_DEFAULT_DEBUG 10
+#define LOG_DEFAULT_LLAMA 5
+
+// intialized in log.cpp from environment variable LLAMA_LOG
+extern int gpt_log_verbosity_env;
 
 struct gpt_log;
 
@@ -24,30 +24,28 @@ void             gpt_log_pause (struct gpt_log * log);
 void             gpt_log_resume(struct gpt_log * log);
 void             gpt_log_free  (struct gpt_log * log);
 
-LOG_ATTRIBUTE_FORMAT(4, 5)
-void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, int verbosity, const char * fmt, ...);
+LOG_ATTRIBUTE_FORMAT(3, 4)
+void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
 
 void gpt_log_set_file      (struct gpt_log * log, const char * file); // not thread-safe
 void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps);
 
 #define LOG_TMPL(level, verbosity, ...) \
     do { \
-        if ((verbosity) <= LOG_VERBOSITY) { \
-            gpt_log_add(gpt_log_main(), (level), (verbosity), __VA_ARGS__); \
+        if ((verbosity) <= gpt_log_verbosity_env) { \
+            gpt_log_add(gpt_log_main(), (level), __VA_ARGS__); \
         } \
     } while (0)
 
 #define LOG(...)             LOG_TMPL(GGML_LOG_LEVEL_NONE, 0,         __VA_ARGS__)
 #define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
 
-#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  0, __VA_ARGS__)
-#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  0, __VA_ARGS__)
-#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
-#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, 0, __VA_ARGS__)
+#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  0,                 __VA_ARGS__)
+#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  0,                 __VA_ARGS__)
+#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0,                 __VA_ARGS__)
+#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
 
 #define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  verbosity, __VA_ARGS__)
 #define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  verbosity, __VA_ARGS__)
 #define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
 #define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
-
-#define LOG_TOKENS_TOSTR_PRETTY(...) std::string("dummy")
diff --git a/common/sampling.cpp b/common/sampling.cpp
@@ -325,7 +325,7 @@ llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {
 }
 
 std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
-    std::string result = "\tlogits ";
+    std::string result = "logits ";
 
     for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
         const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);