diff --git a/common/common.cpp b/common/common.cpp
index 9fa18472512ab..34d0eff78f312 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1714,6 +1714,79 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
 #define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
 #endif
 
+LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
+static std::string format(const char * fmt, ...) {
+    va_list ap;
+    va_list ap2;
+    va_start(ap, fmt);
+    va_copy(ap2, ap);
+    int size = vsnprintf(NULL, 0, fmt, ap);
+    GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
+    std::vector<char> buf(size + 1);
+    int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
+    GGML_ASSERT(size2 == size);
+    va_end(ap2);
+    va_end(ap);
+    return std::string(buf.data(), size);
+}
+
+void gpt_params_print_usage(std::vector<llama_arg> & options) {
+    constexpr static int n_leading_spaces = 40;
+    std::string leading_spaces(n_leading_spaces, ' ');
+    for (const auto & opt : options) {
+        std::ostringstream ss;
+        for (const auto & arg : opt.args) {
+            if (&arg == &opt.args.front()) {
+                ss << format("%-7s", (arg + ",").c_str());
+            } else {
+                ss << arg << (&arg != &opt.args.back() ? ", " : "");
+            }
+        }
+        if (!opt.value_ex.empty()) ss << " " << opt.value_ex;
+        if (ss.tellp() > n_leading_spaces - 3) {
+            // current line is too long, add new line
+            ss << "\n" << leading_spaces;
+        } else {
+            // padding between arg and help, same line
+            ss << std::string(leading_spaces.size() - ss.tellp(), ' ');
+        }
+        const auto help_lines = llama_arg::break_str_into_lines(opt.help, 50);
+        for (const auto & line : help_lines) {
+            ss << (&line == &help_lines.front() ? "" : leading_spaces) << line << "\n";
+        }
+        printf("%s", ss.str().c_str());
+    }
+}
+
+std::vector<llama_arg> gpt_params_parser_register(gpt_params & params) {
+    std::vector<llama_arg> options;
+    options.push_back(llama_arg(
+        {"-h", "--help", "--usage"},
+        "print usage and exit",
+        [&params, &options]() {
+            gpt_params_print_usage(options);
+            exit(0);
+            return true;
+        }
+    ));
+    options.push_back(llama_arg(
+        {"-m", "--model"},
+        format("model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)", params.model.c_str()),
+        [&params](std::string value) {
+            params.model = value;
+            return true;
+        }
+    ).set_value_ex("FNAME"));
+    return options;
+}
+
+bool gpt_params_parser_run(int argc, char ** argv, std::vector<llama_arg> & options) {
+    for (const auto & opt : options) {
+        if (opt.handler_void) opt.handler_void();
+    }
+    return true;
+}
+
 void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     const llama_sampling_params & sparams = params.sparams;
 
diff --git a/common/common.h b/common/common.h
index cb5e7f6df10c5..04f4476f039de 100644
--- a/common/common.h
+++ b/common/common.h
@@ -14,6 +14,7 @@
 #include <vector>
 #include <random>
 #include <thread>
+#include <set>
 #include <unordered_map>
 #include <tuple>
 
@@ -123,7 +124,7 @@ struct gpt_params {
     // // sampling parameters
     struct llama_sampling_params sparams;
 
-    std::string model                = ""; // model path
+    std::string model                = "model.gguf"; // model path
     std::string model_draft          = ""; // draft model for speculative decoding
     std::string model_alias          = "unknown"; // model alias
     std::string model_url            = ""; // model url to download
@@ -277,6 +278,66 @@ struct gpt_params {
     std::string lora_outfile = "ggml-lora-merged-f16.gguf";
 };
 
+enum llama_example {
+    LLAMA_EXAMPLE_ALL,
+    LLAMA_EXAMPLE_SERVER,
+    LLAMA_EXAMPLE_MAIN,
+};
+
+struct llama_arg {
+    std::set<enum llama_example> examples = {LLAMA_EXAMPLE_ALL};
+    std::vector<std::string> args;
+    std::string value_ex;
+    std::string env;
+    std::string help;
+    std::function<bool(void)>        handler_void   = nullptr;
+    std::function<bool(std::string)> handler_string = nullptr;
+    std::function<bool(bool)>        handler_bool   = nullptr;
+    std::function<bool(int)>         handler_int    = nullptr;
+    std::function<bool(float)>       handler_float  = nullptr;
+
+    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(std::string)> handler) : args(args), help(help), handler_string(handler) {}
+
+    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(bool)> handler) : args(args), help(help), handler_bool(handler) {}
+
+    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(void)> handler) : args(args), help(help), handler_void(handler) {}
+
+    llama_arg & set_examples(std::set<enum llama_example> _examples) {
+        examples = std::move(_examples);
+        return *this;
+    }
+
+    llama_arg & set_value_ex(std::string _value_ex) {
+        value_ex = std::move(_value_ex);
+        return *this;
+    }
+
+    llama_arg & set_env(std::string _env) {
+        env = _env;
+        return *this;
+    }
+
+    // utility function
+    static std::vector<std::string> break_str_into_lines(std::string input, size_t max_char_per_line) {
+        std::vector<std::string> result;
+        std::istringstream iss(input);
+        std::string word, line;
+        while (iss >> word) {
+            if (line.length() + !line.empty() + word.length() > max_char_per_line) {
+                if (!line.empty()) result.push_back(line);
+                line = word;
+            } else {
+                line += (!line.empty() ? " " : "") + word;
+            }
+        }
+        if (!line.empty()) result.push_back(line);
+        return result;
+    }
+};
+
+std::vector<llama_arg> gpt_params_parser_register(gpt_params & params);
+bool gpt_params_parser_run(int argc, char ** argv, std::vector<llama_arg> & options);
+
 void gpt_params_parse_from_env(gpt_params & params);
 void gpt_params_handle_model_default(gpt_params & params);
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index c55efbb66d7c1..6a025ed512217 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -131,6 +131,9 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector<l
 int main(int argc, char ** argv) {
     gpt_params params;
     g_params = &params;
+    auto options = gpt_params_parser_register(params);
+    gpt_params_parser_run(argc, argv, options);
+    return 0;
 
     if (!gpt_params_parse(argc, argv, params)) {
         gpt_params_print_usage(argc, argv, params);