From 07a38d096caf1805ecc4d4c05c376843267b44c8 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Fri, 13 Dec 2024 22:46:13 +0000 Subject: [PATCH] Improve progress bar Set default width to whatever the terminal is. Also fixed a small bug around default n_gpu_layers value. Signed-off-by: Eric Curtin --- README.md | 4 +- examples/run/run.cpp | 275 ++++++++++++++++++++++++++++--------------- 2 files changed, 183 insertions(+), 96 deletions(-) diff --git a/README.md b/README.md index 54466c2501c081..42979f73c42c53 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ To learn more about model quantization, [read this documentation](examples/quant -[^1]: [examples/perplexity/README.md](examples/perplexity/README.md) +[^1]: [examples/perplexity/README.md](https://github.com/ggerganov/llama.cpp/blob/master/examples/perplexity/README.md) [^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity) ## [`llama-bench`](example/bench) @@ -446,7 +446,7 @@ To learn more about model quantization, [read this documentation](examples/quant -[^3]: [https://github.com/containers/ramalama](RamaLama) +[^3]: [RamaLama](https://github.com/containers/ramalama) ## [`llama-simple`](examples/simple) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 834ea8f7b4aeb4..2c8c535341fda1 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -1,6 +1,7 @@ #if defined(_WIN32) # include #else +# include # include #endif @@ -29,7 +30,6 @@ class Opt { public: int init(int argc, const char ** argv) { - construct_help_str_(); // Parse arguments if (parse(argc, argv)) { printe("Error: Failed to parse arguments.\n"); @@ -48,14 +48,53 @@ class Opt { std::string model_; std::string user_; - int context_size_ = 2048, ngl_ = -1; + int context_size_ = -1, ngl_ = -1; + bool verbose_ = false; private: - std::string help_str_; bool help_ = false; - void construct_help_str_() { - help_str_ = + int parse(int argc, const char ** argv) { + int positional_args_i = 0; + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0) { + if (i + 1 >= argc) { + return 1; + } + + context_size_ = std::atoi(argv[++i]); + } else if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0) { + if (i + 1 >= argc) { + return 1; + } + + ngl_ = std::atoi(argv[++i]); + } else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0 || + strcmp(argv[i], "--log-verbose") == 0) { + verbose_ = true; + } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + help_ = true; + return 0; + } else if (!positional_args_i) { + if (!argv[i][0] || argv[i][0] == '-') { + return 1; + } + + ++positional_args_i; + model_ = argv[i]; + } else if (positional_args_i == 1) { + ++positional_args_i; + user_ = argv[i]; + } else { + user_ += " " + std::string(argv[i]); + } + } + + return model_.empty(); // model_ is the only required value + } + + void help() const { + printf( "Description:\n" " Runs a llm\n" "\n" @@ -64,15 +103,11 @@ class Opt { "\n" "Options:\n" " -c, --context-size \n" - " Context size (default: " + - std::to_string(context_size_); - help_str_ += - ")\n" + " Context size (default: %d)\n" " -n, --ngl \n" - " Number of GPU layers (default: " + - std::to_string(ngl_); - help_str_ += - ")\n" + " Number of GPU layers (default: %d)\n" + " -v, --verbose, --log-verbose\n" + " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n" " -h, --help\n" " Show help message\n" "\n" @@ -96,43 +131,10 @@ class Opt { " llama-run https://example.com/some-file1.gguf\n" " llama-run some-file2.gguf\n" " llama-run file://some-file3.gguf\n" - " llama-run --ngl 99 some-file4.gguf\n" - " llama-run --ngl 99 some-file5.gguf Hello World\n"; - } - - int parse(int argc, const char ** argv) { - int positional_args_i = 0; - for (int i = 1; i < argc; ++i) { - if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0) { - if (i + 1 >= argc) { - return 1; - } - - context_size_ = std::atoi(argv[++i]); - } else if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0) { - if (i + 1 >= argc) { - return 1; - } - - ngl_ = std::atoi(argv[++i]); - } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { - help_ = true; - return 0; - } else if (!positional_args_i) { - ++positional_args_i; - model_ = argv[i]; - } else if (positional_args_i == 1) { - ++positional_args_i; - user_ = argv[i]; - } else { - user_ += " " + std::string(argv[i]); - } - } - - return model_.empty(); // model_ is the only required value + " llama-run --ngl 999 some-file4.gguf\n" + " llama-run --ngl 999 some-file5.gguf Hello World\n", + llama_context_default_params().n_batch, llama_model_default_params().n_gpu_layers); } - - void help() const { printf("%s", help_str_.c_str()); } }; struct progress_data { @@ -151,8 +153,20 @@ struct FileDeleter { typedef std::unique_ptr FILE_ptr; +static int get_terminal_width() { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO csbi; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); + return csbi.srWindow.Right - csbi.srWindow.Left + 1; +#else + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + return w.ws_col; +#endif +} + #ifdef LLAMA_USE_CURL -class CurlWrapper { +class HttpClient { public: int init(const std::string & url, const std::vector & headers, const std::string & output_file, const bool progress, std::string * response_str = nullptr) { @@ -181,7 +195,7 @@ class CurlWrapper { return 0; } - ~CurlWrapper() { + ~HttpClient() { if (chunk) { curl_slist_free_all(chunk); } @@ -219,7 +233,7 @@ class CurlWrapper { if (progress) { curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &data); - curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback); + curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, update_progress); } } @@ -270,9 +284,9 @@ class CurlWrapper { static std::string human_readable_size(curl_off_t size) { static const char * suffix[] = { "B", "KB", "MB", "GB", "TB" }; - char length = sizeof(suffix) / sizeof(suffix[0]); - int i = 0; - double dbl_size = size; + char length = sizeof(suffix) / sizeof(suffix[0]); + int i = 0; + double dbl_size = size; if (size > 1024) { for (i = 0; (size / 1024) > 0 && i < length - 1; i++, size /= 1024) { dbl_size = size / 1024.0; @@ -284,8 +298,8 @@ class CurlWrapper { return out.str(); } - static int progress_callback(void * ptr, curl_off_t total_to_download, curl_off_t now_downloaded, curl_off_t, - curl_off_t) { + static int update_progress(void * ptr, curl_off_t total_to_download, curl_off_t now_downloaded, curl_off_t, + curl_off_t) { progress_data * data = static_cast(ptr); if (total_to_download <= 0) { return 0; @@ -293,27 +307,80 @@ class CurlWrapper { total_to_download += data->file_size; const curl_off_t now_downloaded_plus_file_size = now_downloaded + data->file_size; - const curl_off_t percentage = (now_downloaded_plus_file_size * 100) / total_to_download; - const curl_off_t pos = (percentage / 5); + const curl_off_t percentage = calculate_percentage(now_downloaded_plus_file_size, total_to_download); + std::string progress_prefix = generate_progress_prefix(percentage); + + const double speed = calculate_speed(now_downloaded, data->start_time); + const double tim = (total_to_download - now_downloaded) / speed; + std::string progress_suffix = + generate_progress_suffix(now_downloaded_plus_file_size, total_to_download, speed, tim); + + int progress_bar_width = calculate_progress_bar_width(progress_prefix, progress_suffix); std::string progress_bar; - for (int i = 0; i < 20; ++i) { - progress_bar.append((i < pos) ? "█" : " "); - } + generate_progress_bar(progress_bar_width, percentage, progress_bar); - // Calculate download speed and estimated time to completion - const auto now = std::chrono::steady_clock::now(); - const std::chrono::duration elapsed_seconds = now - data->start_time; - const double speed = now_downloaded / elapsed_seconds.count(); - const double estimated_time = (total_to_download - now_downloaded) / speed; - printe("\r%ld%% |%s| %s/%s %.2f MB/s %s ", percentage, progress_bar.c_str(), - human_readable_size(now_downloaded).c_str(), human_readable_size(total_to_download).c_str(), - speed / (1024 * 1024), human_readable_time(estimated_time).c_str()); - fflush(stderr); + print_progress(progress_prefix, progress_bar, progress_suffix); data->printed = true; return 0; } + static curl_off_t calculate_percentage(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download) { + return (now_downloaded_plus_file_size * 100) / total_to_download; + } + + static std::string generate_progress_prefix(curl_off_t percentage) { + std::ostringstream progress_output; + progress_output << std::setw(3) << percentage << "% |"; + return progress_output.str(); + } + + static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) { + const auto now = std::chrono::steady_clock::now(); + const std::chrono::duration elapsed_seconds = now - start_time; + return now_downloaded / elapsed_seconds.count(); + } + + static std::string generate_progress_suffix(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download, + double speed, double estimated_time) { + const int width = 10; + std::ostringstream progress_output; + progress_output << std::setw(width) << human_readable_size(now_downloaded_plus_file_size) << "/" + << std::setw(width) << human_readable_size(total_to_download) << std::setw(width) + << human_readable_size(speed) << "/s" << std::setw(width) + << human_readable_time(estimated_time); + return progress_output.str(); + } + + static int calculate_progress_bar_width(const std::string & progress_prefix, const std::string & progress_suffix) { + int progress_bar_width = get_terminal_width() - progress_prefix.size() - progress_suffix.size() - 5; + if (progress_bar_width < 1) { + progress_bar_width = 1; + } + + return progress_bar_width; + } + + static std::string generate_progress_bar(int progress_bar_width, curl_off_t percentage, + std::string & progress_bar) { + const curl_off_t pos = (percentage * progress_bar_width) / 100; + for (int i = 0; i < progress_bar_width; ++i) { + progress_bar.append((i < pos) ? "█" : " "); + } + + return progress_bar; + } + + static void print_progress(const std::string & progress_prefix, const std::string & progress_bar, + const std::string & progress_suffix) { + std::ostringstream progress_output; + progress_output << progress_prefix << progress_bar << "| " << progress_suffix; + printe( + "\r%*s" + "\r%s", + get_terminal_width(), " ", progress_output.str().c_str()); + } + // Function to write data to a file static size_t write_data(void * ptr, size_t size, size_t nmemb, void * stream) { FILE * out = static_cast(stream); @@ -357,8 +424,8 @@ class LlamaData { #ifdef LLAMA_USE_CURL int download(const std::string & url, const std::vector & headers, const std::string & output_file, const bool progress, std::string * response_str = nullptr) { - CurlWrapper curl; - if (curl.init(url, headers, output_file, progress, response_str)) { + HttpClient http; + if (http.init(url, headers, output_file, progress, response_str)) { return 1; } @@ -467,6 +534,10 @@ class LlamaData { llama_model_params model_params = llama_model_default_params(); model_params.n_gpu_layers = opt.ngl_ >= 0 ? opt.ngl_ : model_params.n_gpu_layers; resolve_model(opt.model_); + printe( + "\r%*s" + "\rLoading model\r", + get_terminal_width(), " "); llama_model_ptr model(llama_load_model_from_file(opt.model_.c_str(), model_params)); if (!model) { printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str()); @@ -478,8 +549,7 @@ class LlamaData { // Initializes the context with the specified parameters llama_context_ptr initialize_context(const llama_model_ptr & model, const int n_ctx) { llama_context_params ctx_params = llama_context_default_params(); - ctx_params.n_ctx = n_ctx; - ctx_params.n_batch = n_ctx; + ctx_params.n_ctx = ctx_params.n_batch = n_ctx >= 0 ? n_ctx : ctx_params.n_batch; llama_context_ptr context(llama_new_context_with_model(model.get(), ctx_params)); if (!context) { printe("%s: error: failed to create the llama_context\n", __func__); @@ -609,16 +679,20 @@ static int read_user_input(std::string & user) { } // Function to generate a response based on the prompt -static int generate_response(LlamaData & llama_data, const std::string & prompt, std::string & response) { +static int generate_response(LlamaData & llama_data, const std::string & prompt, std::string & response, + const bool stdout_a_terminal) { // Set response color - printf("\033[33m"); + if (stdout_a_terminal) { + printf("\033[33m"); + } + if (generate(llama_data, prompt, response)) { printe("failed to generate response\n"); return 1; } // End response with color reset and newline - printf("\n\033[0m"); + printf("\n%s", stdout_a_terminal ? "\033[0m" : ""); return 0; } @@ -642,15 +716,37 @@ static int handle_user_input(std::string & user_input, const std::string & user_ } printf( - "\r " - "\r\033[32m> \033[0m"); + "\r%*s" + "\r\033[32m> \033[0m", + get_terminal_width(), " "); return read_user_input(user_input); // Returns true if input ends the loop } +static bool is_stdin_a_terminal() { +#if defined(_WIN32) + HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE); + DWORD mode; + return GetConsoleMode(hStdin, &mode); +#else + return isatty(STDIN_FILENO); +#endif +} + +static bool is_stdout_a_terminal() { +#if defined(_WIN32) + HANDLE hStdout = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD mode; + return GetConsoleMode(hStdout, &mode); +#else + return isatty(STDOUT_FILENO); +#endif +} + // Function to tokenize the prompt static int chat_loop(LlamaData & llama_data, const std::string & user_) { int prev_len = 0; llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get())); + static const bool stdout_a_terminal = is_stdout_a_terminal(); while (true) { // Get user input std::string user_input; @@ -665,7 +761,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user_) { std::string prompt(llama_data.fmtted.begin() + prev_len, llama_data.fmtted.begin() + new_len); std::string response; - if (generate_response(llama_data, prompt, response)) { + if (generate_response(llama_data, prompt, response, stdout_a_terminal)) { return 1; } @@ -682,22 +778,13 @@ static int chat_loop(LlamaData & llama_data, const std::string & user_) { return 0; } -static void log_callback(const enum ggml_log_level level, const char * text, void *) { - if (level == GGML_LOG_LEVEL_ERROR) { +static void log_callback(const enum ggml_log_level level, const char * text, void * p) { + const Opt * opt = static_cast(p); + if (opt->verbose_ || level == GGML_LOG_LEVEL_ERROR) { printe("%s", text); } } -static bool is_stdin_a_terminal() { -#if defined(_WIN32) - HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE); - DWORD mode; - return GetConsoleMode(hStdin, &mode); -#else - return isatty(STDIN_FILENO); -#endif -} - static std::string read_pipe_data() { std::ostringstream result; result << std::cin.rdbuf(); // Read all data from std::cin @@ -721,7 +808,7 @@ int main(int argc, const char ** argv) { opt.user_ += read_pipe_data(); } - llama_log_set(log_callback, nullptr); + llama_log_set(log_callback, &opt); LlamaData llama_data; if (llama_data.init(opt)) { return 1;