Skip to content

Commit

Permalink
metal : simplify f16 and f32 dequant kernels (#0)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Nov 4, 2024
1 parent f8e5813 commit 05697f6
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions ggml/src/ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,12 @@ constexpr constant static float kvalues_iq4nl_f[16] = {
// NOTE: this is not dequantizing - we are simply fitting the template
template <typename type4x4>
void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) {
float4x4 temp = *(((device float4x4 *)src));
for (int i = 0; i < 16; i++){
reg[i/4][i%4] = temp[i/4][i%4];
}
reg = (type4x4)(*src);
}

template <typename type4x4>
void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
half4x4 temp = *(((device half4x4 *)src));
for (int i = 0; i < 16; i++){
reg[i/4][i%4] = temp[i/4][i%4];
}
reg = (type4x4)(*src);
}

template <typename type4x4>
Expand Down

0 comments on commit 05697f6

Please sign in to comment.