Skip to content

Commit

Permalink
Add mistral-v1, mistral-v3, mistral-v3-tekken and mistral-v7
Browse files Browse the repository at this point in the history
…chat template types (#10572)

* Templates: `mistral-v1`, `mistral-v2`, `mistral-v3`, `mistral-v3-tekken`

* Changed system message logic and added tests for all 4

* Invalid `system_message` instead of `content` fixed

* Removed tab-indented lines

* Added template code and test for `mistral-v7`

* Added all tests. Fixed bug with `tmpl == "llama2"` test.

* Replaced tabs with spaces.

* Removed `'mistral-v2'` option as no (open) models ever used it

* Removed all references to 'v2' template from comments

* Update llama.cpp

Fixed `trim_assistant_message` bug
  • Loading branch information
jukofyork authored Dec 1, 2024
1 parent 5e1ed95 commit 917786f
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 33 deletions.
104 changes: 74 additions & 30 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21862,41 +21862,85 @@ static int32_t llama_chat_apply_template_internal(
if (add_ass) {
ss << "<|im_start|>assistant\n";
}
} else if (tmpl == "llama2" || tmpl == "mistral" || tmpl_contains("[INST]")) {
// llama2 template and its variants
// [variant] support system message
bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "mistral";
// [variant] space before + after response
bool space_around_response = tmpl_contains("' ' + eos_token");
// [variant] add BOS inside history
bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
// [variant] trim spaces from the input message
bool strip_message = tmpl_contains("content.strip()");
// construct the prompt
bool is_inside_turn = true; // skip BOS at the beginning
ss << "[INST] ";
for (auto message : chat) {
std::string content = strip_message ? trim(message->content) : message->content;
std::string role(message->role);
if (!is_inside_turn) {
is_inside_turn = true;
ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
} else if (tmpl == "llama2" || tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
if (tmpl == "mistral-v7" || tmpl_contains("[SYSTEM_PROMPT]")) {
// Official mistral 'v7' template
// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
for (auto message : chat) {
std::string role(message->role);
std::string content(message->content);
if (role == "system") {
ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
} else if (role == "user") {
ss << "[INST] " << content << "[/INST]";
}
else {
ss << " " << content << "</s>";
}
}
if (role == "system") {
if (support_system_message) {
ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
} else if (tmpl == "mistral-v1" || tmpl == "mistral-v3" || tmpl == "mistral-v3-tekken"
|| tmpl_contains("' [INST] ' + system_message") // catches official 'v1' template
|| tmpl_contains("[AVAILABLE_TOOLS]")) { // catches official 'v3' and 'v3-tekken' templates
// Official mistral 'v1', 'v3' and 'v3-tekken' templates
// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
std::string leading_space = (tmpl == "mistral-v1" || tmpl_contains(" [INST]") ? " " : "");
std::string trailing_space = (tmpl == "mistral-v3-tekken" || tmpl_contains("\"[INST]\"") ? "" : " ");
bool trim_assistant_message = tmpl_contains("|trim + eos_token");
bool is_inside_turn = false;
for (auto message : chat) {
if (!is_inside_turn) {
ss << leading_space << "[INST]" << trailing_space;
is_inside_turn = true;
}
std::string role(message->role);
std::string content(message->content);
if (role == "system") {
ss << content << "\n\n";
} else if (role == "user") {
ss << content << leading_space << "[/INST]";
} else {
// if the model does not support system message, we still include it in the first message, but without <<SYS>>
ss << content << "\n";
ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
is_inside_turn = false;
}
}
} else {
// llama2 template and its variants
// [variant] support system message
// See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "llama2";
// [variant] space before + after response
bool space_around_response = tmpl_contains("' ' + eos_token");
// [variant] add BOS inside history
bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
// [variant] trim spaces from the input message
bool strip_message = tmpl_contains("content.strip()");
// construct the prompt
bool is_inside_turn = true; // skip BOS at the beginning
ss << "[INST] ";
for (auto message : chat) {
std::string content = strip_message ? trim(message->content) : message->content;
std::string role(message->role);
if (!is_inside_turn) {
is_inside_turn = true;
ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
}
if (role == "system") {
if (support_system_message) {
ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
} else {
// if the model does not support system message, we still include it in the first message, but without <<SYS>>
ss << content << "\n";
}
} else if (role == "user") {
ss << content << " [/INST]";
} else {
ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
is_inside_turn = false;
}
} else if (role == "user") {
ss << content << " [/INST]";
} else {
ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
is_inside_turn = false;
}
// llama2 templates seem to not care about "add_generation_prompt
}
// llama2 templates seem to not care about "add_generation_prompt"
} else if (tmpl == "phi3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>"))) {
// Phi 3
for (auto message : chat) {
Expand Down
Loading

0 comments on commit 917786f

Please sign in to comment.