From 9b3d83318931aa98c487baaa977626931d059e6a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 May 2024 12:36:37 +0300 Subject: [PATCH 1/2] cuda : fix compile warning (#7454) --- ggml-cuda/fattn-tile-f32.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ggml-cuda/fattn-tile-f32.cu b/ggml-cuda/fattn-tile-f32.cu index 54db765e2f8ee..b8b2f69e19edb 100644 --- a/ggml-cuda/fattn-tile-f32.cu +++ b/ggml-cuda/fattn-tile-f32.cu @@ -283,8 +283,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * } void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; + const ggml_tensor * Q = dst->src[0]; if (Q->ne[1] <= 16) { constexpr int cols_per_block = 16; From 03d8900ebe062355e26a562379daee5f17ea099f Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 22 May 2024 07:08:18 -0400 Subject: [PATCH 2/2] llama : add missing model type names (#7445) --- llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llama.cpp b/llama.cpp index abff8c1c03e7a..d8c6f29a536aa 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3771,14 +3771,17 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { static const char * llama_model_type_name(e_model type) { switch (type) { + case MODEL_17M: return "17M"; case MODEL_22M: return "22M"; case MODEL_33M: return "33M"; case MODEL_109M: return "109M"; case MODEL_137M: return "137M"; + case MODEL_335M: return "335M"; case MODEL_0_5B: return "0.5B"; case MODEL_1B: return "1B"; case MODEL_2B: return "2B"; case MODEL_3B: return "3B"; + case MODEL_4B: return "4B"; case MODEL_7B: return "7B"; case MODEL_8B: return "8B"; case MODEL_12B: return "12B";