Skip to content

Commit

Permalink
Correction too small tensor embeddings to quantize
Browse files Browse the repository at this point in the history
IQ2_XS doesn't seem to work as such, back to IQ2_S
  • Loading branch information
Nexesenex committed Aug 21, 2024
1 parent 32f6ead commit 644aa9f
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15964,11 +15964,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
new_type = GGML_TYPE_Q4_K;
}
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_XS;
else new_type = GGML_TYPE_IQ2_S;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
new_type = GGML_TYPE_IQ2_S;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
Expand Down

0 comments on commit 644aa9f

Please sign in to comment.