Skip to content

Commit

Permalink
Merge pull request #23 from Nexesenex/master_experimental
Browse files Browse the repository at this point in the history
Master experimental
  • Loading branch information
Nexesenex authored Dec 4, 2023
2 parents 8602f5a + 085a542 commit b80eea3
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 229 deletions.
5 changes: 5 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
params.infill = true;
} else if (arg == "-dkvc" || arg == "--dump-kv-cache") {
params.dump_kv_cache = true;
} else if (arg == "-nkvo" || arg == "--no-kv-offload") {
params.no_kv_offload = true;
} else if (arg == "--multiline-input") {
params.multiline_input = true;
} else if (arg == "--simple-io") {
Expand Down Expand Up @@ -841,6 +843,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --verbose-prompt print prompt before generation\n");
printf(" -dkvc, --dump-kv-cache\n");
printf(" verbose print of the KV cache\n");
printf(" -nkvo, --no-kv-offload\n");
printf(" disable KV offload\n");
printf(" --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n");
printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
printf(" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n");
Expand Down Expand Up @@ -925,6 +929,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
cparams.yarn_beta_fast = params.yarn_beta_fast;
cparams.yarn_beta_slow = params.yarn_beta_slow;
cparams.yarn_orig_ctx = params.yarn_orig_ctx;
cparams.offload_kqv = !params.no_kv_offload;

return cparams;
}
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ struct gpt_params {
bool verbose_prompt = false; // print prompt tokens before generation
bool infill = false; // use infill mode
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
bool no_kv_offload = false; // disable KV offloading

// multimodal models (see examples/llava)
std::string mmproj = ""; // path to multimodal projector
Expand Down
Loading

0 comments on commit b80eea3

Please sign in to comment.