Skip to content

Commit

Permalink
metal : remove unused n_buffers and buffers (ggerganov#5129)
Browse files Browse the repository at this point in the history
  • Loading branch information
ptsochantaris authored Jan 26, 2024
1 parent 38b431d commit 6dd3c28
Showing 1 changed file with 16 additions and 57 deletions.
73 changes: 16 additions & 57 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,6 @@

#define GGML_METAL_MAX_KERNELS 256

struct ggml_metal_buffer {
const char * name;

void * data;
size_t size;

id<MTLBuffer> metal;
};

struct ggml_metal_kernel {
id<MTLFunction> function;
id<MTLComputePipelineState> pipeline;
Expand Down Expand Up @@ -172,9 +163,6 @@

dispatch_queue_t d_queue;

int n_buffers;
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];

struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];

bool support_simdgroup_reduction;
Expand Down Expand Up @@ -242,24 +230,20 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
NSString * s = [device name];
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif

// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
NSString * s = [device name];
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);

// Configure context
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
ctx->device = device;
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
ctx->queue = [ctx->device newCommandQueue];
ctx->n_buffers = 0;

ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);

// load library
Expand Down Expand Up @@ -534,10 +518,6 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
static void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);

for (int i = 0; i < ctx->n_buffers; ++i) {
[ctx->buffers[i].metal release];
}

for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
if (ctx->kernels[i].pipeline) {
[ctx->kernels[i].pipeline release];
Expand Down Expand Up @@ -580,51 +560,30 @@ static void ggml_metal_free(struct ggml_metal_context * ctx) {
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
// Metal buffer based on the host memory pointer
//
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);

const int64_t tsize = ggml_nbytes(t);

ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;

// compatibility with ggml-backend
if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;

// find the view that contains the tensor fully
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;

//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
*offs = (size_t) ioffs;

//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);

return buf_ctx->buffers[i].metal;
}
}

GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);

return nil;
}
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;

// find the view that contains the tensor fully
for (int i = 0; i < ctx->n_buffers; ++i) {
const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;

//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
*offs = (size_t) ioffs;

//GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);

return ctx->buffers[i].metal;
return buf_ctx->buffers[i].metal;
}
}

GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);

return nil;
}
Expand Down Expand Up @@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;

id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;

//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//if (src0) {
Expand Down Expand Up @@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];

size_t offs_src_cur = 0;
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);

[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
}
Expand Down Expand Up @@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];

size_t offs_src_cur = 0;
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);

[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
}
Expand Down

0 comments on commit 6dd3c28

Please sign in to comment.