Skip to content

Commit

Permalink
Vulkan IQ4_NL Support (#8613)
Browse files Browse the repository at this point in the history
* Fix Vulkan matmul tests compile errors

* Add Vulkan IQ4_NL support

* Fix Vulkan DeepSeek-Coder-V2-Lite MoE support
  • Loading branch information
0cc4m authored Jul 23, 2024
1 parent 46e4741 commit 751fcfc
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 190 deletions.
322 changes: 140 additions & 182 deletions ggml/src/ggml-vulkan.cpp

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions ggml/src/vulkan-shaders/dequant_funcs.comp
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,11 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
}
#endif

#if defined(DATA_A_IQ4_NL)
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
}
#endif
30 changes: 30 additions & 0 deletions ggml/src/vulkan-shaders/dequant_iq4_nl.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#version 450

#include "dequant_head.comp"

layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer A {block_iq4_nl data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};

void main() {
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;

const uint tid = gl_LocalInvocationID.x % 64;
const uint il = tid/32;
const uint ir = tid%32;
const uint ib = 32*i + ir;
if (ib >= p.nel / 32) {
return;
}

const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;

const float d = float(data_a[ib].d);

[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]);
data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]);
}
}
10 changes: 4 additions & 6 deletions ggml/src/vulkan-shaders/dequant_q4_0.comp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,13 @@ void main() {
return;
}

const uint b_idx = 1024*i + 32*ir + 8*il;
const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;

const float d = float(data_a[ib].d);
const float dm = -8.0f * d;

const uint q_idx = 8*il;

[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + dm);
data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >> 4) + dm);
data_b[b_idx + l + 0] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] & 0xF) - 8.0f));
data_b[b_idx + l + 16] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] >> 4) - 8.0f));
}
}
15 changes: 14 additions & 1 deletion ggml/src/vulkan-shaders/mul_mm.comp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ shared FLOAT_TYPE buf_a[BM * (BK+1)];
shared FLOAT_TYPE buf_b[BN * (BK+1)];

#ifdef MUL_MAT_ID
shared u16vec2 row_ids[2048];
shared u16vec2 row_ids[3072];
#endif

void main() {
Expand Down Expand Up @@ -380,6 +380,19 @@ void main() {

buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
#elif defined(DATA_A_IQ4_NL)
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a;

const uint ib = idx / 16;
const uint iqs = idx & 0xF;

const float d = float(data_a[ib].d);
const uint vui = uint(data_a[ib].qs[iqs]);
const vec2 v = vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;

buf_a[buf_idx ] = FLOAT_TYPE(v.x);
buf_a[buf_idx + 16] = FLOAT_TYPE(v.y);
#endif
}
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {
Expand Down
21 changes: 21 additions & 0 deletions ggml/src/vulkan-shaders/types.comp
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,24 @@ struct block_q6_K

#define A_TYPE block_q6_K
#endif

// IQuants

#if defined(DATA_A_IQ4_NL)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 2

struct block_iq4_nl
{
float16_t d;
uint8_t qs[QUANT_K/2];
};

#define A_TYPE block_iq4_nl

const int8_t kvalues_iq4nl[16] = {
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
};
#endif
3 changes: 2 additions & 1 deletion ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ const std::vector<std::string> type_names = {
"q3_k",
"q4_k",
"q5_k",
"q6_k"
"q6_k",
"iq4_nl"
};

void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {
Expand Down

0 comments on commit 751fcfc

Please sign in to comment.