diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 149fbb23093d3..6ac70ba69e281 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -148,45 +148,35 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_ const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict; const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx_llava->ctx_llama)); - #if 0 - // llava chat format is "\nUSER:\n\nASSISTANT:" - eval_string(ctx_llava->ctx_llama, "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:", params->n_batch, &n_past, add_bos); - llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past); - eval_string(ctx_llava->ctx_llama, (prompt + "\nASSISTANT:").c_str(), params->n_batch, &n_past, false); - #else - std::string system_prompt, user_prompt; - size_t image_pos = prompt.find(""); - if (image_pos != std::string::npos) { - // new templating mode: Provide the full prompt including system message and use as a placeholder for the image - - system_prompt = prompt.substr(0, image_pos); - user_prompt = prompt.substr(image_pos + std::string("").length()); - // We replace \n with actual newlines in user_prompt, just in case -e was not used in templating string - size_t pos = 0; - while ((pos = user_prompt.find("\\n", pos)) != std::string::npos) { - user_prompt.replace(pos, 2, "\n"); - pos += 1; // Advance past the replaced newline - } - while ((pos = system_prompt.find("\\n", pos)) != std::string::npos) { - system_prompt.replace(pos, 2, "\n"); - pos += 1; // Advance past the replaced newline - } - - printf("system_prompt: %s\n", system_prompt.c_str()); - printf("user_prompt: %s\n", user_prompt.c_str()); - } else { - // llava-1.5 native mode - system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:"; - user_prompt = prompt + "\nASSISTANT:"; + std::string system_prompt, user_prompt; + size_t image_pos = prompt.find(""); + if (image_pos != std::string::npos) { + // new templating mode: Provide the full prompt including system message and use as a placeholder for the image + + system_prompt = prompt.substr(0, image_pos); + user_prompt = prompt.substr(image_pos + std::string("").length()); + // We replace \n with actual newlines in user_prompt, just in case -e was not used in templating string + size_t pos = 0; + while ((pos = user_prompt.find("\\n", pos)) != std::string::npos) { + user_prompt.replace(pos, 2, "\n"); + pos += 1; // Advance past the replaced newline + } + while ((pos = system_prompt.find("\\n", pos)) != std::string::npos) { + system_prompt.replace(pos, 2, "\n"); + pos += 1; // Advance past the replaced newline } + printf("system_prompt: %s\n", system_prompt.c_str()); + printf("user_prompt: %s\n", user_prompt.c_str()); + } else { + // llava-1.5 native mode + system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:"; + user_prompt = prompt + "\nASSISTANT:"; + } - - eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos); - llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past); - eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false); - - #endif + eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos); + llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past); + eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false); // generate the response