Skip to content

Commit

Permalink
[SYCL] fix the mul_mat_id ut issues (#8427)
Browse files Browse the repository at this point in the history
* fix part of mul_mat_id

* skip the bfloat 16 sycl ut

Signed-off-by: Chen Xi <[email protected]>

---------

Signed-off-by: Chen Xi <[email protected]>
Co-authored-by: Meng, Hengyu <[email protected]>
Co-authored-by: Chen Xi <[email protected]>
  • Loading branch information
3 people authored Jul 12, 2024
1 parent 3686456 commit b549a1b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 44 deletions.
2 changes: 1 addition & 1 deletion ggml/src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)

// backend registry

#define GGML_REG_MAX_BACKENDS 16
#define GGML_REG_MAX_BACKENDS 64

struct ggml_backend_reg {
char name[128];
Expand Down
49 changes: 13 additions & 36 deletions ggml/src/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));

const ggml_tensor_extra_gpu *src0_extra =
(const ggml_tensor_extra_gpu *)src0->extra;
const ggml_tensor_extra_gpu *src1_extra =
(const ggml_tensor_extra_gpu *)src1->extra;
const ggml_tensor_extra_gpu *dst_extra =
(const ggml_tensor_extra_gpu *)dst->extra;

ggml_tensor_extra_gpu src0_row_extra;
ggml_tensor_extra_gpu src1_row_extra;
ggml_tensor_extra_gpu dst_row_extra;

ggml_tensor src0_row = *src0;
ggml_tensor src1_row = *src1;
ggml_tensor dst_row = *dst;

src1_row.backend = GGML_BACKEND_TYPE_GPU;
dst_row.backend = GGML_BACKEND_TYPE_GPU;

src0_row.extra = &src0_row_extra;
src1_row.extra = &src1_row_extra;
dst_row.extra = &dst_row_extra;

char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
? (char *)src0->data
: (char *)src0_extra->data_device[ctx.device];
char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
? (char *)src1->data
: (char *)src1_extra->data_device[ctx.device];
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
? (char *)dst->data
: (char *)dst_extra->data_device[ctx.device];
char *src0_original = (char *)src0->data;
char *src1_original = (char *)src1->data;
char *dst_original = (char *)dst->data;

src0_row.ne[2] = 1;
src0_row.ne[3] = 1;
Expand Down Expand Up @@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
const int64_t i1 = id;
const int64_t i2 = i12;

src0_row_extra.data_device[ctx.device] =
src0_original + i02*nb02;
src1_row_extra.data_device[ctx.device] =
src1_original + + i11*nb11 + i12*nb12;
dst_row_extra.data_device[ctx.device] =
dst_original + i1*nb1 + i2*nb2;
src0_row.data = src0_original + i02*nb02;
src1_row.data = src1_original + + i11*nb11 + i12*nb12;
dst_row.data = dst_original + i1*nb1 + i2*nb2;

ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
}
Expand All @@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));

src1_row_extra.data_device[ctx.device] = src1_contiguous.get();
dst_row_extra.data_device[ctx.device] = dst_contiguous.get();
src1_row.data = src1_contiguous.get();
dst_row.data = dst_contiguous.get();

for (int64_t i02 = 0; i02 < n_as; i02++) {
int64_t num_src1_rows = 0;
Expand Down Expand Up @@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
});
}

src0_row_extra.data_device[ctx.device] = src0_original + i02*nb02;
src0_row.data = src0_original + i02*nb02;

GGML_ASSERT(nb11 == sizeof(float)*ne10);
GGML_ASSERT(nb1 == sizeof(float)*ne0);
Expand Down Expand Up @@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
return false;
}
}
ggml_type src0_type = op->src[0]->type;
if (src0_type == GGML_TYPE_BF16) {
return false;
}
return true;
} break;
case GGML_OP_GET_ROWS:
Expand Down
7 changes: 0 additions & 7 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5883,13 +5883,6 @@ static bool llm_load_tensors(

auto & hparams = model.hparams;

#ifdef GGML_USE_SYCL
// disable MoE with SYCL until mul_mat_id is updated
if (hparams.n_expert > 0) {
n_gpu_layers = 0;
}
#endif

model.split_mode = split_mode;
model.main_gpu = main_gpu;
model.n_gpu_layers = n_gpu_layers;
Expand Down

0 comments on commit b549a1b

Please sign in to comment.