From c31ed2abfce05c38a2a5189586bfae45a139a547 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Wed, 27 Nov 2024 01:32:54 -0600 Subject: [PATCH] vulkan: define all quant data structures in types.comp (#10440) --- .../src/ggml-vulkan/vulkan-shaders/types.comp | 147 +++++++++--------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp index bc28e0ab857aa..eecc47f3a9764 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp @@ -30,10 +30,8 @@ #endif #endif -#if defined(DATA_A_Q4_0) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K_Q4_0 32 +#define QUANT_R_Q4_0 2 struct block_q4_0 { @@ -46,14 +44,15 @@ struct block_q4_0_packed16 uint16_t qs[16/2]; }; +#if defined(DATA_A_Q4_0) +#define QUANT_K QUANT_K_Q4_0 +#define QUANT_R QUANT_R_Q4_0 #define A_TYPE block_q4_0 #define A_TYPE_PACKED16 block_q4_0_packed16 #endif -#if defined(DATA_A_Q4_1) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K_Q4_1 32 +#define QUANT_R_Q4_1 2 struct block_q4_1 { @@ -69,15 +68,15 @@ struct block_q4_1_packed16 uint16_t qs[16/2]; }; +#if defined(DATA_A_Q4_1) +#define QUANT_K QUANT_K_Q4_1 +#define QUANT_R QUANT_R_Q4_1 #define A_TYPE block_q4_1 #define A_TYPE_PACKED16 block_q4_1_packed16 #endif -#if defined(DATA_A_Q5_0) -#extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K_Q5_0 32 +#define QUANT_R_Q5_0 2 struct block_q5_0 { @@ -93,15 +92,15 @@ struct block_q5_0_packed16 uint16_t qs[16/2]; }; +#if defined(DATA_A_Q5_0) +#define QUANT_K QUANT_K_Q5_0 +#define QUANT_R QUANT_R_Q5_0 #define A_TYPE block_q5_0 #define A_TYPE_PACKED16 block_q5_0_packed16 #endif -#if defined(DATA_A_Q5_1) -#extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K_Q5_1 32 +#define QUANT_R_Q5_1 2 struct block_q5_1 { @@ -119,14 +118,15 @@ struct block_q5_1_packed16 uint16_t qs[16/2]; }; +#if defined(DATA_A_Q5_1) +#define QUANT_K QUANT_K_Q5_1 +#define QUANT_R QUANT_R_Q5_1 #define A_TYPE block_q5_1 #define A_TYPE_PACKED16 block_q5_1_packed16 #endif -#if defined(DATA_A_Q8_0) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 32 -#define QUANT_R 1 +#define QUANT_K_Q8_0 32 +#define QUANT_R_Q8_0 1 struct block_q8_0 { @@ -139,164 +139,164 @@ struct block_q8_0_packed16 uint16_t qs[32/2]; }; +#if defined(DATA_A_Q8_0) +#define QUANT_K QUANT_K_Q8_0 +#define QUANT_R QUANT_R_Q8_0 #define A_TYPE block_q8_0 #define A_TYPE_PACKED16 block_q8_0_packed16 #endif // K-quants -#if defined(DATA_A_Q2_K) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 256 +#define QUANT_K_Q2_K 256 struct block_q2_K { - uint8_t scales[QUANT_K/16]; - uint8_t qs[QUANT_K/4]; + uint8_t scales[QUANT_K_Q2_K/16]; + uint8_t qs[QUANT_K_Q2_K/4]; f16vec2 d; }; struct block_q2_K_packed16 { - uint16_t scales[QUANT_K/16/2]; - uint16_t qs[QUANT_K/4/2]; + uint16_t scales[QUANT_K_Q2_K/16/2]; + uint16_t qs[QUANT_K_Q2_K/4/2]; f16vec2 d; }; struct block_q2_K_packed32 { - uint32_t scales[QUANT_K/16/4]; - uint32_t qs[QUANT_K/4/4]; + uint32_t scales[QUANT_K_Q2_K/16/4]; + uint32_t qs[QUANT_K_Q2_K/4/4]; f16vec2 d; }; +#if defined(DATA_A_Q2_K) +#define QUANT_K QUANT_K_Q2_K #define A_TYPE block_q2_K #define A_TYPE_PACKED16 block_q2_K_packed16 #define A_TYPE_PACKED32 block_q2_K_packed32 #endif -#if defined(DATA_A_Q3_K) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 256 +#define QUANT_K_Q3_K 256 struct block_q3_K { - uint8_t hmask[QUANT_K/8]; - uint8_t qs[QUANT_K/4]; + uint8_t hmask[QUANT_K_Q3_K/8]; + uint8_t qs[QUANT_K_Q3_K/4]; uint8_t scales[12]; float16_t d; }; struct block_q3_K_packed16 { - uint16_t hmask[QUANT_K/8/2]; - uint16_t qs[QUANT_K/4/2]; + uint16_t hmask[QUANT_K_Q3_K/8/2]; + uint16_t qs[QUANT_K_Q3_K/4/2]; uint16_t scales[12/2]; float16_t d; }; +#if defined(DATA_A_Q3_K) +#define QUANT_K QUANT_K_Q3_K #define A_TYPE block_q3_K #define A_TYPE_PACKED16 block_q3_K_packed16 #endif -#if defined(DATA_A_Q4_K) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 256 +#define QUANT_K_Q4_K 256 struct block_q4_K { f16vec2 d; - uint8_t scales[3*QUANT_K/64]; - uint8_t qs[QUANT_K/2]; + uint8_t scales[3*QUANT_K_Q4_K/64]; + uint8_t qs[QUANT_K_Q4_K/2]; }; struct block_q4_K_packed16 { f16vec2 d; - uint16_t scales[3*QUANT_K/64/2]; - uint16_t qs[QUANT_K/2/2]; + uint16_t scales[3*QUANT_K_Q4_K/64/2]; + uint16_t qs[QUANT_K_Q4_K/2/2]; }; struct block_q4_K_packed32 { f16vec2 d; - uint32_t scales[3*QUANT_K/64/4]; - uint32_t qs[QUANT_K/2/4]; + uint32_t scales[3*QUANT_K_Q4_K/64/4]; + uint32_t qs[QUANT_K_Q4_K/2/4]; }; +#if defined(DATA_A_Q4_K) +#define QUANT_K QUANT_K_Q4_K #define A_TYPE block_q4_K #define A_TYPE_PACKED16 block_q4_K_packed16 #define A_TYPE_PACKED32 block_q4_K_packed32 #endif -#if defined(DATA_A_Q5_K) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 256 +#define QUANT_K_Q5_K 256 struct block_q5_K { f16vec2 d; uint8_t scales[12]; - uint8_t qh[QUANT_K/8]; - uint8_t qs[QUANT_K/2]; + uint8_t qh[QUANT_K_Q5_K/8]; + uint8_t qs[QUANT_K_Q5_K/2]; }; struct block_q5_K_packed16 { f16vec2 d; uint16_t scales[12/2]; - uint16_t qh[QUANT_K/8/2]; - uint16_t qs[QUANT_K/2/2]; + uint16_t qh[QUANT_K_Q5_K/8/2]; + uint16_t qs[QUANT_K_Q5_K/2/2]; }; +#if defined(DATA_A_Q5_K) +#define QUANT_K QUANT_K_Q5_K #define A_TYPE block_q5_K #define A_TYPE_PACKED16 block_q5_K_packed16 #endif -#if defined(DATA_A_Q6_K) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 256 +#define QUANT_K_Q6_K 256 struct block_q6_K { - uint8_t ql[QUANT_K/2]; - uint8_t qh[QUANT_K/4]; - int8_t scales[QUANT_K/16]; + uint8_t ql[QUANT_K_Q6_K/2]; + uint8_t qh[QUANT_K_Q6_K/4]; + int8_t scales[QUANT_K_Q6_K/16]; float16_t d; }; struct block_q6_K_packed16 { - uint16_t ql[QUANT_K/2/2]; - uint16_t qh[QUANT_K/4/2]; - int8_t scales[QUANT_K/16]; + uint16_t ql[QUANT_K_Q6_K/2/2]; + uint16_t qh[QUANT_K_Q6_K/4/2]; + int8_t scales[QUANT_K_Q6_K/16]; float16_t d; }; +#if defined(DATA_A_Q6_K) +#define QUANT_K QUANT_K_Q6_K #define A_TYPE block_q6_K #define A_TYPE_PACKED16 block_q6_K_packed16 #endif // IQuants -#if defined(DATA_A_IQ4_NL) -#extension GL_EXT_shader_16bit_storage : require -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K_IQ4_NL 32 +#define QUANT_R_IQ4_NL 2 struct block_iq4_nl { float16_t d; - uint8_t qs[QUANT_K/2]; + uint8_t qs[QUANT_K_IQ4_NL/2]; }; struct block_iq4_nl_packed16 { float16_t d; - uint16_t qs[QUANT_K/2/2]; + uint16_t qs[QUANT_K_IQ4_NL/2/2]; }; -#define A_TYPE block_iq4_nl -#define A_TYPE_PACKED16 block_iq4_nl_packed16 +#if defined(DATA_A_IQ4_NL) const int8_t kvalues_iq4nl_const[16] = { int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10), @@ -313,6 +313,11 @@ void init_iq4nl_shmem() } barrier(); } + +#define QUANT_K QUANT_K_IQ4_NL +#define QUANT_R QUANT_R_IQ4_NL +#define A_TYPE block_iq4_nl +#define A_TYPE_PACKED16 block_iq4_nl_packed16 #endif #endif // !defined(GGML_TYPES_COMP)