From 2dff6f4d215cf2852e99b324aa0092d0155616a9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 12 Sep 2024 15:05:40 +0300 Subject: [PATCH] log : simplify init --- common/common.cpp | 11 +++++++++++ common/common.h | 3 +++ common/log.cpp | 2 +- examples/batched-bench/batched-bench.cpp | 6 +----- examples/batched/batched.cpp | 6 +----- .../convert-llama2c-to-ggml.cpp | 7 ++----- examples/embedding/embedding.cpp | 8 +------- examples/eval-callback/eval-callback.cpp | 8 +------- examples/imatrix/imatrix.cpp | 8 ++------ examples/infill/infill.cpp | 8 +------- examples/llava/llava-cli.cpp | 6 +----- examples/llava/minicpmv-cli.cpp | 6 +----- examples/lookahead/lookahead.cpp | 6 +----- examples/lookup/lookup-stats.cpp | 6 +----- examples/lookup/lookup.cpp | 6 +----- examples/main/main.cpp | 6 +++--- examples/parallel/parallel.cpp | 6 +----- examples/passkey/passkey.cpp | 6 +----- examples/perplexity/perplexity.cpp | 8 +------- examples/retrieval/retrieval.cpp | 8 +------- examples/server/server.cpp | 10 ++-------- examples/server/utils.hpp | 5 +++-- examples/simple/simple.cpp | 6 +----- examples/speculative/speculative.cpp | 6 +----- 24 files changed, 43 insertions(+), 115 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1a1f7299c724e..b8db4088043f0 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -362,6 +362,17 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD return true; } +void gpt_init() { + llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { + if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { + gpt_log_add(gpt_log_main(), level, "%s", text); + } + }, NULL); + + + LOG_INF("build: %d (%s) with %s for %s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET); +} + std::string gpt_params_get_system_info(const gpt_params & params) { std::ostringstream os; diff --git a/common/common.h b/common/common.h index 06ea7abad4c74..077572eff0493 100644 --- a/common/common.h +++ b/common/common.h @@ -339,6 +339,9 @@ struct gpt_params { bool batched_bench_output_jsonl = false; }; +// call once at the start of a program using common +void gpt_init(); + std::string gpt_params_get_system_info(const gpt_params & params); bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]); diff --git a/common/log.cpp b/common/log.cpp index 528e4cda6fc68..062b10be99b08 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -67,7 +67,7 @@ struct gpt_log_entry { if (level != GGML_LOG_LEVEL_NONE) { if (timestamp) { // [M.s.ms.us] - fprintf(fcur, "" LOG_COL_BLUE "%05d.%02d.%03d.%03d" LOG_COL_DEFAULT " ", + fprintf(fcur, "" LOG_COL_BLUE "%d.%02d.%03d.%03d" LOG_COL_DEFAULT " ", (int) (timestamp / 1000000 / 60), (int) (timestamp / 1000000 % 60), (int) (timestamp / 1000 % 1000), diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 61562a387754e..023dbe483dd6e 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -15,11 +15,7 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 413a6e571a822..64a5a58f5fc6e 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -15,11 +15,7 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_log_init(); gpt_params params; diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 3d4df24dcb242..ecff95f9a69de 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -872,16 +872,13 @@ static std::string basename(const std::string &path) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); struct train_params params = get_default_train_params(); if (!params_parse(argc, argv, ¶ms)) { return 1; } + Config config; TransformerWeights weights = {}; { diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 716fb71272280..349bab452631d 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -79,11 +79,7 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_log_init(); gpt_params params; @@ -95,8 +91,6 @@ int main(int argc, char ** argv) { // For non-causal models, batch size must be equal to ubatch size params.n_ubatch = params.n_batch; - print_build_info(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index 24ef2a6149344..2f55a6ddbb471 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -140,11 +140,7 @@ static bool run(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); callback_data cb_data; @@ -154,8 +150,6 @@ int main(int argc, char ** argv) { return 1; } - print_build_info(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index cdd34059e8c23..cb1985891f056 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -543,7 +543,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { if (params.compute_ppl) { const int first = n_ctx/2; - const auto all_logits = num_batches > 1 ? logits.data() : llama_get_logits(ctx); + const auto * all_logits = num_batches > 1 ? logits.data() : llama_get_logits(ctx); process_logits(n_vocab, all_logits + first*n_vocab, tokens.data() + start + first, n_ctx - 1 - first, workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first); count += n_ctx - first - 1; @@ -573,11 +573,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 213618f09ca74..7c7c292765f98 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -104,11 +104,7 @@ static void sigint_handler(int signo) { #endif int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; g_params = ¶ms; @@ -159,8 +155,6 @@ int main(int argc, char ** argv) { LOG_WRN("%s: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale); } - print_build_info(); - LOG_INF("%s: llama backend init\n", __func__); llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 01ee757c27f6c..d4383cfa076a2 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -270,11 +270,7 @@ static void llava_free(struct llava_context * ctx_llava) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); ggml_time_init(); diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 676410d8a41c1..d763703f8d760 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -248,11 +248,7 @@ static const char * llama_loop(struct llava_context * ctx_llava,struct gpt_sampl } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); ggml_time_init(); diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index 6e936a7d008b4..05a5d67c3c5d9 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -37,11 +37,7 @@ struct ngram_container { }; int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index 1c486903e7890..752b3406867c0 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -13,11 +13,7 @@ #include int main(int argc, char ** argv){ - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index 11ddb37ccf919..420880c15d13d 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -13,11 +13,7 @@ #include int main(int argc, char ** argv){ - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 07814c4f63fea..5fb0797422b03 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -126,11 +126,13 @@ static std::string chat_add_and_format(struct llama_model * model, std::vectorchat_template, chat_msgs, new_msg, role == "user"); chat_msgs.push_back({role, content}); - LOG_DBG("formatted: %s\n", formatted.c_str()); + LOG_DBG("formatted: '%s'\n", formatted.c_str()); return formatted; } int main(int argc, char ** argv) { + gpt_init(); + llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { gpt_log_add(gpt_log_main(), level, "%s", text); @@ -179,8 +181,6 @@ int main(int argc, char ** argv) { LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale); } - print_build_info(); - LOG_INF("%s: llama backend init\n", __func__); llama_backend_init(); diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 519e97a560313..5b4c0362ce13d 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -103,11 +103,7 @@ static std::vector split_string(const std::string& input, char deli int main(int argc, char ** argv) { srand(1234); - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 5df8ad7a4fd39..8e367f350336f 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -15,11 +15,7 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 11dbb4480227a..87423965e95c0 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1957,11 +1957,7 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; @@ -2005,8 +2001,6 @@ int main(int argc, char ** argv) { params.n_ctx += params.ppl_stride/2; } - print_build_info(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index ed5af43070a80..fd236fb61d06e 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -112,11 +112,7 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); gpt_params params; @@ -137,8 +133,6 @@ int main(int argc, char ** argv) { return 1; } - print_build_info(); - LOG_INF("processing files:\n"); for (auto & context_file : params.context_files) { LOG_INF("%s\n", context_file.c_str()); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a54365f3053c6..8890054743079 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2372,11 +2372,7 @@ inline void signal_handler(int signal) { } int main(int argc, char ** argv) { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_env) { - gpt_log_add(gpt_log_main(), level, "%s", text); - } - }, NULL); + gpt_init(); // own arguments required by this example gpt_params params; @@ -2401,8 +2397,6 @@ int main(int argc, char ** argv) { llama_backend_init(); llama_numa_init(params.numa); - LOG_INF("build: %d %s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); - LOG_INF("system info: n_threads = %d, n_threads_batch = %d, total_threads = %d\n", params.cpuparams.n_threads, params.cpuparams_batch.n_threads, std::thread::hardware_concurrency()); LOG_INF("\n"); LOG_INF("%s\n", gpt_params_get_system_info(params).c_str()); @@ -3193,7 +3187,7 @@ int main(int argc, char ** argv) { } // print sample chat example to make it clear which template is used - LOG_INF("%s: chat template, built_in: %d, chat_example: %s\n", __func__, params.chat_template.empty(), llama_chat_format_example(ctx_server.model, params.chat_template).c_str()); + LOG_INF("%s: chat template, built_in: %d, chat_example: '%s\n'", __func__, params.chat_template.empty(), llama_chat_format_example(ctx_server.model, params.chat_template).c_str()); ctx_server.queue_tasks.on_new_task(std::bind( &server_context::process_single_task, &ctx_server, std::placeholders::_1)); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index e7527845d799a..7b6a261a7e066 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -85,7 +85,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri } const auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true); - LOG_DBG("formatted_chat: %s\n", formatted_chat.c_str()); + LOG_DBG("formatted_chat: '%s'\n", formatted_chat.c_str()); return formatted_chat; } @@ -295,7 +295,8 @@ static json probs_vector_to_json(const llama_context * ctx, const std::vector