From 83ed24a97b500ccdb32b90b94e6f9621ad8db79e Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Fri, 13 Dec 2024 12:12:15 +0530 Subject: [PATCH] SYCL: Reduce most of the compiler warnings (#10748) * Try to reduce some unused and typecast warnings * Reduce compiler warnings step 2 * add a newline at the end of the file * Initialize nreduce as size_t * [SYCL] Remove pragma directives from mmq.cpp * SYCL: mmq add condition to prevent blocks_per_tile_x_row variable from becoming 0 * SYCL softmax: Initialize nreduce as size_t * ggml-sycl.cpp: fix some trailing whitespaces * SYCL: remove the unused variables instead of commenting it out * SYCL poo2d kernel: set NAN for invalid pooling op * SYCL gemm.hpp: remove pragma directives * SYCL gemm.hpp: use const cast to properly support dnnl::memory * SYCL: wkv6 remove a comment * SYCL: clean comments step 2 * SYCL: clean comments and variables step 3 * SYCL: Use GGML_UNUSED for unused variables * SYCL: remove extra empty lines and a comment * Remove TODO * cleanup spaces * add a stdout for unsupported op * use sycl printf over fprintf * remove prints for CI * SYCL ggml-sycl: pool2D use sycl::nan and remove if-else block --------- Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> --- ggml/src/ggml-sycl/common.cpp | 13 +-- ggml/src/ggml-sycl/common.hpp | 1 + ggml/src/ggml-sycl/concat.cpp | 4 +- ggml/src/ggml-sycl/convert.cpp | 2 +- ggml/src/ggml-sycl/dmmv.cpp | 10 +- ggml/src/ggml-sycl/dpct/helper.hpp | 2 +- ggml/src/ggml-sycl/element_wise.cpp | 141 ++++++++++++++++------------ ggml/src/ggml-sycl/gemm.hpp | 8 +- ggml/src/ggml-sycl/ggml-sycl.cpp | 134 ++++++++++++++------------ ggml/src/ggml-sycl/im2col.cpp | 5 +- ggml/src/ggml-sycl/mmq.cpp | 12 +-- ggml/src/ggml-sycl/mmvq.cpp | 26 ++--- ggml/src/ggml-sycl/norm.cpp | 7 +- ggml/src/ggml-sycl/rope.cpp | 7 +- ggml/src/ggml-sycl/softmax.cpp | 14 +-- ggml/src/ggml-sycl/tsembd.cpp | 1 + ggml/src/ggml-sycl/wkv6.cpp | 5 +- 17 files changed, 205 insertions(+), 187 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 97ab2003c7fa1..a9ee404911460 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -11,6 +11,7 @@ // #include "common.hpp" +#include "ggml-impl.h" int get_current_device_id() { return dpct::dev_mgr::instance().current_device_id(); @@ -28,11 +29,7 @@ void* ggml_sycl_host_malloc(size_t size) try { if (err != 0) { // clear the error - fprintf( - stderr, - "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", - size / 1024.0 / 1024.0, - "syclGetErrorString is not supported"); + GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0, "syclGetErrorString is not supported"); return nullptr; } @@ -66,18 +63,12 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, const ggml_sycl_op_flatten_t op) try { - const int64_t nrows0 = ggml_nrows(src0); const bool use_src1 = src1 != nullptr; - const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1; GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT); GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT); - ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; - ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr; - ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; - // dd = data device float * src0_ddf = (float *) src0->data; float * src1_ddf = use_src1 ? (float *) src1->data : nullptr; diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 4549fa5e95a09..c1582f610e5f4 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -626,6 +626,7 @@ struct bin_bcast_sycl { }); } } + GGML_UNUSED(ctx); } }; diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp index c90c452d8783f..a240968ad2e48 100644 --- a/ggml/src/ggml-sycl/concat.cpp +++ b/ggml/src/ggml-sycl/concat.cpp @@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst, // operation int offset_dst = nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1); - if (item_ct1.get_group(1) < ne01) { // src0 + if (item_ct1.get_group(1) < (size_t) ne01) { // src0 int offset_src = nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01; dst[offset_dst] = x[offset_src]; @@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst, // operation int offset_dst = nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1); - if (item_ct1.get_group(0) < ne02) { // src0 + if (item_ct1.get_group(0) < (size_t) ne02) { // src0 int offset_src = nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1); dst[offset_dst] = x[offset_src]; diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp index 5fd15e6cdccab..05b01db2d8b05 100644 --- a/ggml/src/ggml-sycl/convert.cpp +++ b/ggml/src/ggml-sycl/convert.cpp @@ -424,7 +424,7 @@ static void convert_unary(const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2); // make each work-item deal with more elements since sycl global range can not exceed max int - const src_t * x = (src_t *) vx; + const src_t * x = (const src_t *) vx; for (int64_t i = global_id; i < k; i += work_group_size * item_ct1.get_group_range(2)) { y[i] = x[i]; } diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp index 0c3dfaa37eb02..0d097357ce79b 100644 --- a/ggml/src/ggml-sycl/dmmv.cpp +++ b/ggml/src/ggml-sycl/dmmv.cpp @@ -1015,9 +1015,9 @@ void ggml_sycl_op_dequantize_mul_mat_vec( break; } - (void) src1; - (void) dst; - (void) src1_ddq_i; - (void) src1_ncols; - (void) src1_padded_row_size; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_ddq_i); + GGML_UNUSED(src1_ncols); + GGML_UNUSED(src1_padded_row_size); } diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index d1b5dd87c6922..e167948e7a3f0 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -1237,7 +1237,7 @@ namespace dpct std::map::iterator get_map_iterator(const void *ptr) { - auto it = m_map.upper_bound((byte_t *)ptr); + auto it = m_map.upper_bound(const_cast(reinterpret_cast(ptr))); if (it == m_map.end()) { // Not a virtual pointer. diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index e5cd736eba989..d05a51f807c20 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -237,7 +237,7 @@ void upscale_f32(const float *x, float *dst, const int nb00, const int nb01, int i02 = i12 / sf2; int i03 = i13 / sf3; - dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00); + dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00); } void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02, @@ -251,8 +251,7 @@ void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const i // operation int offset_dst = nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1); - if (nidx < ne00 && item_ct1.get_group(1) < ne01 && - item_ct1.get_group(0) < ne02) { + if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) { int offset_src = nidx + item_ct1.get_group(1) * ne00 + item_ct1.get_group(0) * ne00 * ne01; dst[offset_dst] = x[offset_src]; @@ -520,9 +519,10 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -535,9 +535,10 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, @@ -550,9 +551,10 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_ gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -564,9 +566,10 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor GGML_ASSERT( dst->type == GGML_TYPE_F32); tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -579,9 +582,10 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -595,9 +599,10 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -610,9 +615,10 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -625,9 +631,10 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -640,9 +647,10 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -655,9 +663,10 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -670,9 +679,10 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -685,9 +695,10 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -700,9 +711,10 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -715,9 +727,10 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -730,9 +743,10 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -749,9 +763,10 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_ leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -764,9 +779,10 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -787,9 +803,10 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -805,9 +822,10 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -827,7 +845,8 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream); - (void) dst; + GGML_UNUSED(dst); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml/src/ggml-sycl/gemm.hpp index 2ad9b36f419ce..3f0f34ad603f5 100644 --- a/ggml/src/ggml-sycl/gemm.hpp +++ b/ggml/src/ggml-sycl/gemm.hpp @@ -51,8 +51,8 @@ class DnnlGemmWrapper { const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab); const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab); const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab); - auto a_mem = dnnl::memory(a_in_md, eng, (void*)a); - auto b_mem = dnnl::memory(b_in_md, eng, (void*)b); + auto a_mem = dnnl::memory(a_in_md, eng, const_cast(a)); + auto b_mem = dnnl::memory(b_in_md, eng, const_cast(b)); auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md); auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c); @@ -79,8 +79,8 @@ class DnnlGemmWrapper { const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab); const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab); const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab); - auto a_mem = dnnl::memory(a_in_md, eng, (void*)a); - auto b_mem = dnnl::memory(b_in_md, eng, (void*)b); + auto a_mem = dnnl::memory(a_in_md, eng, const_cast(a)); + auto b_mem = dnnl::memory(b_in_md, eng, const_cast(b)); auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md); auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index ae3baedc7b6c3..6b9f0b0d9a1c8 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -47,7 +47,7 @@ static ggml_sycl_device_info ggml_sycl_init() { info.device_count = dpct::dev_mgr::instance().device_count(); if (info.device_count == 0) { - GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__); + GGML_LOG_ERROR("%s: failed to initialize: %s\n", GGML_SYCL_NAME, __func__); return info; } @@ -64,7 +64,7 @@ static ggml_sycl_device_info ggml_sycl_init() { #else GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__); #endif - GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count); + GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME); for (int i = 0; i < info.device_count; ++i) { info.devices[i].vmm = 0; @@ -137,7 +137,6 @@ void ggml_backend_sycl_print_sycl_devices() { for (int id = 0; id < device_count; ++id) { sycl::device device = dpct::dev_mgr::instance().get_device(id); - sycl::backend backend = device.get_backend(); std::string backend_type = get_device_backend_and_type(device); int type_id = DeviceNums[backend_type]++; std::stringstream device_type; @@ -420,13 +419,11 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer, return true; } return false; + GGML_UNUSED(buffer); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -catch (sycl::exception const &exc) { - std::cerr << exc.what() << "Exception caught at file:" << __FILE__ - << ", line:" << __LINE__ << std::endl; - std::exit(1); -} - static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) try { @@ -1092,10 +1089,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool { ggml_sycl_buffer buffer_pool[MAX_SYCL_BUFFERS] = {}; size_t pool_size = 0; - explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) : - qptr(qptr_), - device(device_) { - } + explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) : device(device_), qptr(qptr_) {} ~ggml_sycl_pool_leg() { for (int i = 0; i < MAX_SYCL_BUFFERS; ++i) { @@ -1238,7 +1232,7 @@ static void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, zeros[i] = 0.f; qzeros[i] = 0; } - const TC xi = ix < kx ? *(TC *)&x[iy * kx + ix] : zeros; + const TC xi = ix < kx ? *(const TC *)&x[iy * kx + ix] : zeros; float sum = xi[0]; float amax = sycl::fabs(xi[0]); #pragma unroll @@ -1799,6 +1793,9 @@ static void pool2d_nchw_kernel( switch (op) { case GGML_OP_POOL_AVG: res = 0; break; case GGML_OP_POOL_MAX: res = -FLT_MAX; break; + default: + res = (To) sycl::nan(uint32_t(0)); + break; } for (int i = bh; i < eh; i += 1) { @@ -1817,6 +1814,9 @@ static void pool2d_nchw_kernel( switch (op) { case GGML_OP_POOL_AVG: res += (cur / (kh * kw)); break; case GGML_OP_POOL_MAX: res = sycl::max(res, (To)cur); break; + default: + res = (To) sycl::nan(uint32_t(0)); + break; } } } @@ -1855,7 +1855,8 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); }); - (void) dst; + GGML_UNUSED(dst); + GGML_UNUSED(ctx); } template @@ -1893,10 +1894,10 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens }); } - (void) dst; + GGML_UNUSED(dst); + GGML_UNUSED(ctx); } - static void quantize_row_q8_1_sycl(const float *x, void *vy, const int kx, const int ky, const int kx_padded, queue_ptr stream) { @@ -2464,8 +2465,8 @@ static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, const ggml_tens ggml_sycl_op_bin_bcast>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream); - (void) src1; - (void) src1_d; + GGML_UNUSED(src1); + GGML_UNUSED(src1_d); } @@ -2484,17 +2485,18 @@ inline void ggml_sycl_op_mul_mat_sycl( const int64_t ne00 = src0->ne[0]; const int64_t ne10 = src1->ne[0]; - const int64_t ne0 = dst->ne[0]; const int64_t row_diff = row_high - row_low; int id; SYCL_CHECK( CHECK_TRY_ERROR(id = get_current_device_id())); - +#if !GGML_SYCL_DNNL + const int64_t ne0 = dst->ne[0]; // the main device has a larger memory buffer to hold the results from all GPUs // ldc == nrows of the matrix that cuBLAS writes into int ldc = id == ctx.device ? ne0 : row_diff; +#endif #ifdef GGML_SYCL_F16 bool use_fp16 = true; // TODO(Yu) SYCL capability check @@ -2531,9 +2533,9 @@ inline void ggml_sycl_op_mul_mat_sycl( : src1_as_f16.get(); ggml_sycl_pool_alloc dst_f16(ctx.pool(), row_diff * src1_ncols); - const sycl::half alpha_f16 = 1.0f; - const sycl::half beta_f16 = 0.0f; #if !GGML_SYCL_DNNL + const sycl::half alpha_f16 = 1.0f; + const sycl::half beta_f16 = 0.0f; SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm( *stream, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10, @@ -2570,9 +2572,9 @@ inline void ggml_sycl_op_mul_mat_sycl( const float * src0_ddf_i = src0->type == GGML_TYPE_F32 ? (const float *) src0_dd_i : src0_ddq_as_f32.get(); const float * src1_ddf1_i = src1->type == GGML_TYPE_F32 ? (const float *) src1_ddf_i : src1_ddq_as_f32.get(); - const float alpha = 1.0f; - const float beta = 0.0f; #if !GGML_SYCL_DNNL + const float alpha = 1.0f; + const float beta = 0.0f; # ifdef GGML_SYCL_NVIDIA SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm( oneapi::mkl::backend_selector{ *stream }, oneapi::mkl::transpose::trans, @@ -2590,9 +2592,9 @@ inline void ggml_sycl_op_mul_mat_sycl( src0_ddf_i, DnnlGemmWrapper::to_dt(), dst_dd_i, DnnlGemmWrapper::to_dt()); #endif } - (void) dst; - (void) src1_ddq_i; - (void) src1_padded_row_size; + GGML_UNUSED(dst); + GGML_UNUSED(src1_ddq_i); + GGML_UNUSED(src1_padded_row_size); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -2638,8 +2640,9 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens item_ct1); }); - (void) src1; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -2654,9 +2657,10 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -2673,9 +2677,10 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_te sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -2694,9 +2699,10 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_ten argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -2713,9 +2719,10 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tens argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -2735,9 +2742,10 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const gg diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -2758,9 +2766,10 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tenso */ SYCL_CHECK(0); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -2783,9 +2792,10 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tenso */ SYCL_CHECK(0); - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) { @@ -2862,7 +2872,6 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; - ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; const bool src0_is_contiguous = ggml_is_contiguous(src0); const bool src1_is_contiguous = ggml_is_contiguous(src1); @@ -3289,7 +3298,6 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, GGML_TENSOR_BINARY_OP_LOCALS - const int64_t ne_dst = ggml_nelements(dst); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); queue_ptr main_stream = ctx.stream();; @@ -3397,6 +3405,7 @@ catch (sycl::exception const &exc) { inline bool ggml_sycl_supports_mmq(enum ggml_type type) { // TODO: accuracy issues in MMQ + GGML_UNUSED(type); return false; } @@ -3772,7 +3781,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr GGML_ABORT("fatal error"); } - (void) dst; + GGML_UNUSED(dst); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -3783,7 +3792,7 @@ catch (sycl::exception const &exc) { static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { // TODO: why do we pass dst as src1 here? ggml_sycl_cpy(ctx, src0, dst, nullptr); - (void) src1; + GGML_UNUSED(src1); } static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -3828,13 +3837,16 @@ static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor } static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - (void) src0; - (void) src1; - (void) dst; + GGML_UNUSED(src0); + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(ctx); } void ggml_sycl_set_main_device(const int main_device) try { - if (dpct::get_current_device_id() == main_device) return; + if (dpct::get_current_device_id() == static_cast (main_device)) { + return; + } check_allow_gpu_index(main_device); dpct::select_device(main_device); @@ -4202,6 +4214,7 @@ try { ggml_backend_sycl_context *sycl_ctx = (ggml_backend_sycl_context *)backend->context; + sycl::event *sycl_event = static_cast(event->context); const queue_ptr &stream = sycl_ctx->stream(sycl_ctx->device, 0); @@ -4216,7 +4229,7 @@ catch (sycl::exception const &exc) } static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try { - ggml_backend_sycl_context* sycl_ctx = static_cast(backend->context); + sycl::event* sycl_event = static_cast(event->context); if (ggml_backend_is_sycl(backend)) { @@ -4624,6 +4637,7 @@ static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, cons // SYCL doesn't support registering host memory, left here for reference // "ggml_backend_register_host_buffer" // "ggml_backend_unregister_host_buffer" + GGML_UNUSED(name); return nullptr; } diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml/src/ggml-sycl/im2col.cpp index 6a0a0fcd08c68..6146a99edbe77 100644 --- a/ggml/src/ggml-sycl/im2col.cpp +++ b/ggml/src/ggml-sycl/im2col.cpp @@ -120,6 +120,7 @@ void ggml_sycl_op_im2col( im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); } - (void) src0; - (void) src0_dd; + GGML_UNUSED(src0); + GGML_UNUSED(src0_dd); + GGML_UNUSED(ctx); } diff --git a/ggml/src/ggml-sycl/mmq.cpp b/ggml/src/ggml-sycl/mmq.cpp index e952533d310ec..8ea82c940c788 100644 --- a/ggml/src/ggml-sycl/mmq.cpp +++ b/ggml/src/ggml-sycl/mmq.cpp @@ -813,7 +813,7 @@ load_tiles_q4_K(const void *__restrict__ vx, int *__restrict__ x_ql, x_ql[i * (WARP_SIZE + 1) + k] = get_int_from_uint8_aligned(bxi->qs, kqsx); } - const int blocks_per_tile_x_row = WARP_SIZE / QI4_K; // == 1 if QK_K == 256 + constexpr int blocks_per_tile_x_row = QI4_K > WARP_SIZE ? 1 : WARP_SIZE / QI4_K; // == 1 if QK_K == 256 const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256 #pragma unroll @@ -961,7 +961,7 @@ load_tiles_q5_K(const void *__restrict__ vx, int *__restrict__ x_ql, x_ql[i * (2*WARP_SIZE + 1) + kq1] = ql1 | qh1; } - const int blocks_per_tile_x_row = WARP_SIZE / QI5_K; // == 1 if QK_K == 256 + constexpr int blocks_per_tile_x_row = QI5_K > WARP_SIZE ? 1 : WARP_SIZE / QI5_K; // == 1 if QK_K == 256 const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256 #pragma unroll @@ -1109,7 +1109,7 @@ load_tiles_q6_K(const void *__restrict__ vx, int *__restrict__ x_ql, dpct::sub_sat()); } - const int blocks_per_tile_x_row = WARP_SIZE / QI6_K; // == 1 if QK_K == 256 + constexpr int blocks_per_tile_x_row = QI6_K > WARP_SIZE ? 1 : WARP_SIZE / QI6_K; // == 1 if QK_K == 256 const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256 float * x_dmf = (float *) x_dm; @@ -3020,9 +3020,9 @@ void ggml_sycl_op_mul_mat_q( break; } - (void) src1; - (void) dst; - (void) src1_ddf_i; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_ddf_i); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp index 7b10cf688148e..221f65c21ea36 100644 --- a/ggml/src/ggml-sycl/mmvq.cpp +++ b/ggml/src/ggml-sycl/mmvq.cpp @@ -753,11 +753,7 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0]; - auto ksigns64_ptr_ct1 = &ksigns64[0]; - + stream->submit([&](sycl::handler & cgh) { cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) @@ -780,9 +776,6 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0]; - auto ksigns64_ptr_ct1 = &ksigns64[0]; - cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) @@ -805,9 +798,6 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0]; - auto ksigns64_ptr_ct1 = &ksigns64[0]; - cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) @@ -830,8 +820,6 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - auto iq3s_grid_ptr_ct1 = &iq3s_grid[0]; - cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) @@ -854,9 +842,6 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0]; - auto ksigns64_ptr_ct1 = &ksigns64[0]; - cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) @@ -954,7 +939,7 @@ void ggml_sycl_op_mul_mat_vec_q( const size_t q8_1_bs = QK8_1; // the main device has a larger memory buffer to hold the results from all GPUs // nrows_dst == nrows of the matrix that the kernel writes into - const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff; + for (int i = 0; i < src1_ncols; i++) { const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs; @@ -1023,7 +1008,8 @@ void ggml_sycl_op_mul_mat_vec_q( break; } } - (void) src1; - (void) dst; - (void) src1_ddf_i; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_ddf_i); + GGML_UNUSED(ctx); } diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index 72d8fdb878c8d..9cf2be15575d8 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -31,7 +31,7 @@ static void norm_f32(const float* x, float* dst, const int ncols, const float ep */ item_ct1.barrier(sycl::access::fence_space::local_space); mean_var = 0.f; - int nreduce = nwarps / WARP_SIZE; + size_t nreduce = nwarps / WARP_SIZE; for (size_t i = 0; i < nreduce; i += 1) { mean_var += s_sum[lane_id + i * WARP_SIZE]; @@ -55,7 +55,7 @@ static void group_norm_f32(const float* x, float* dst, const int group_size, con const int nthreads = item_ct1.get_local_range(2); const int nwarps = nthreads / WARP_SIZE; start += item_ct1.get_local_id(2); - int nreduce = nwarps / WARP_SIZE; + size_t nreduce = nwarps / WARP_SIZE; if (end >= ne_elements) { end = ne_elements; @@ -163,7 +163,7 @@ static void rms_norm_f32(const float* x, float* dst, const int ncols, const floa converged control flow. You may need to adjust the code. */ item_ct1.barrier(sycl::access::fence_space::local_space); - int nreduce = nwarps / WARP_SIZE; + size_t nreduce = nwarps / WARP_SIZE; tmp = 0.f; for (size_t i = 0; i < nreduce; i += 1) { @@ -352,6 +352,7 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* (void)src1; (void)dst; (void)src1_dd; + GGML_UNUSED(ctx); } void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index 1f06f78fa3d91..1244b231af738 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -269,7 +269,8 @@ void ggml_sycl_op_rope( } } - (void) src1; - (void) dst; - (void) src1_dd; + GGML_UNUSED(src1); + GGML_UNUSED(dst); + GGML_UNUSED(src1_dd); + GGML_UNUSED(ctx); } diff --git a/ggml/src/ggml-sycl/softmax.cpp b/ggml/src/ggml-sycl/softmax.cpp index 17a542e490362..a9b3fce0dc430 100644 --- a/ggml/src/ggml-sycl/softmax.cpp +++ b/ggml/src/ggml-sycl/softmax.cpp @@ -16,7 +16,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const const int lane_id = item_ct1.get_local_id(2) % WARP_SIZE; const int nthreads = block_size; const int nwarps = nthreads / WARP_SIZE; - int nreduce = nwarps / WARP_SIZE; + size_t nreduce = nwarps / WARP_SIZE; float slope = 1.0f; // ALiBi @@ -53,8 +53,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const if (block_size > WARP_SIZE) { if (warp_id == 0) { buf[lane_id] = -INFINITY; - for (size_t i = 1; i < nreduce; i += 1) + for (size_t i = 1; i < nreduce; i += 1) { buf[lane_id + i * WARP_SIZE] = -INFINITY; + } } item_ct1.barrier(sycl::access::fence_space::local_space); @@ -63,8 +64,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const } item_ct1.barrier(sycl::access::fence_space::local_space); max_val = buf[lane_id]; - for (size_t i = 1; i < nreduce; i += 1) - { + for (size_t i = 1; i < nreduce; i += 1) { max_val = std::max(max_val, buf[lane_id + i * WARP_SIZE]); } max_val = warp_reduce_max(max_val, item_ct1); @@ -89,8 +89,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const item_ct1.barrier(sycl::access::fence_space::local_space); if (warp_id == 0) { buf[lane_id] = 0.f; - for (size_t i = 1; i < nreduce; i += 1) + for (size_t i = 1; i < nreduce; i += 1) { buf[lane_id + i * WARP_SIZE] = 0.f; + } } item_ct1.barrier(sycl::access::fence_space::local_space); @@ -100,8 +101,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const item_ct1.barrier(sycl::access::fence_space::local_space); tmp = buf[lane_id]; - for (size_t i = 1; i < nreduce; i += 1) - { + for (size_t i = 1; i < nreduce; i += 1) { tmp += buf[lane_id + i * WARP_SIZE]; } tmp = warp_reduce_sum(tmp, item_ct1); diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index d5c227cd1abcd..2ffe3cca91725 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -68,4 +68,5 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml const int max_period = dst->op_params[1]; timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); + GGML_UNUSED(src1); } diff --git a/ggml/src/ggml-sycl/wkv6.cpp b/ggml/src/ggml-sycl/wkv6.cpp index 4c737f4bfce2f..75ddfb86ac0f7 100644 --- a/ggml/src/ggml-sycl/wkv6.cpp +++ b/ggml/src/ggml-sycl/wkv6.cpp @@ -59,7 +59,7 @@ static void rwkv_wkv_f32_kernel( float y = 0; // Process in chunks of 4 for better vectorization - sycl::float4 k4, r4, tf4, td4, s4, kv4; + sycl::float4 k4, r4, tf4, td4, s4; #pragma unroll for (int j = 0; j < head_size; j += 4) { // Load data in vec4 chunks @@ -135,4 +135,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s ); }); }); + + GGML_UNUSED(src0); + GGML_UNUSED(src1); }