Skip to content

Commit

Permalink
mmvq in cuda path
Browse files Browse the repository at this point in the history
  • Loading branch information
airMeng committed Aug 19, 2024
1 parent 8b96a69 commit 877d91b
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion ggml/src/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3581,7 +3581,8 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor

bool use_mul_mat_vec_q = ggml_is_quantized(src0->type)
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE && src1->ne[1] > MMVQ_MIN_BATCH_SIZE;
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE
&& ctx.stream()->get_backend() == sycl::backend::ext_oneapi_cuda ? true: src1->ne[1] > MMVQ_MIN_BATCH_SIZE;

bool use_mul_mat_q = ggml_sycl_supports_mmq(src0->type)
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;
Expand Down

0 comments on commit 877d91b

Please sign in to comment.