diff --git a/ggml-metal.m b/ggml-metal.m index 60fef1a1912b5..ab3c84f7fd9e9 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -26,15 +26,6 @@ #define GGML_METAL_MAX_KERNELS 256 -struct ggml_metal_buffer { - const char * name; - - void * data; - size_t size; - - id metal; -}; - struct ggml_metal_kernel { id function; id pipeline; @@ -172,9 +163,6 @@ dispatch_queue_t d_queue; - int n_buffers; - struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS]; - struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS]; bool support_simdgroup_reduction; @@ -242,24 +230,20 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){ // Show all the Metal device instances in the system NSArray * devices = MTLCopyAllDevices(); for (id device in devices) { - NSString * s = [device name]; - GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]); + GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]); } [devices release]; // since it was created by a *Copy* C method #endif // Pick and show default Metal device id device = MTLCreateSystemDefaultDevice(); - NSString * s = [device name]; - GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]); + GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); // Configure context struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context)); ctx->device = device; ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS); ctx->queue = [ctx->device newCommandQueue]; - ctx->n_buffers = 0; - ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); // load library @@ -534,10 +518,6 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){ static void ggml_metal_free(struct ggml_metal_context * ctx) { GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); - for (int i = 0; i < ctx->n_buffers; ++i) { - [ctx->buffers[i].metal release]; - } - for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { if (ctx->kernels[i].pipeline) { [ctx->kernels[i].pipeline release]; @@ -580,51 +560,30 @@ static void ggml_metal_free(struct ggml_metal_context * ctx) { // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the // Metal buffer based on the host memory pointer // -static id ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) { +static id ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) { //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach); const int64_t tsize = ggml_nbytes(t); ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer; - // compatibility with ggml-backend - if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) { - struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context; - - // find the view that contains the tensor fully - for (int i = 0; i < buf_ctx->n_buffers; ++i) { - const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data; - - //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size); - if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) { - *offs = (size_t) ioffs; - - //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs); - - return buf_ctx->buffers[i].metal; - } - } - - GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name); - - return nil; - } + struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context; // find the view that contains the tensor fully - for (int i = 0; i < ctx->n_buffers; ++i) { - const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data; + for (int i = 0; i < buf_ctx->n_buffers; ++i) { + const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data; - //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name); - if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) { + //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size); + if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) { *offs = (size_t) ioffs; - //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs); + //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs); - return ctx->buffers[i].metal; + return buf_ctx->buffers[i].metal; } } - GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__); + GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name); return nil; } @@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute( const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT; - id id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil; - id id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil; - id id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil; + id id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil; + id id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil; + id id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil; //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op)); //if (src0) { @@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute( struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)]; size_t offs_src_cur = 0; - id id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); + id id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur); [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j]; } @@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute( struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)]; size_t offs_src_cur = 0; - id id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); + id id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur); [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j]; }