Skip to content

Commit

Permalink
threadpool: use relaxed order for chunk sync
Browse files Browse the repository at this point in the history
Full memory barrier is an overkill for this since each thread works on different chunk
  • Loading branch information
max-krasnyansky committed Aug 22, 2024
1 parent d90a9c3 commit 6b22b53
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ typedef enum {
static void atomic_store(atomic_int * ptr, LONG val) {
InterlockedExchange(ptr, val);
}
static void atomic_store_explicit(atomic_int * ptr, LONG val, memory_order mo) {
// TODO: add support for explicit memory order
InterlockedExchange(ptr, val);
}
static LONG atomic_load(atomic_int * ptr) {
return InterlockedCompareExchange(ptr, 0, 0);
}
Expand Down Expand Up @@ -12471,7 +12475,7 @@ UseGgmlGemm1:;

if (ith == 0) {
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
atomic_store(&params->threadpool->current_chunk, nth);
atomic_store_explicit(&params->threadpool->current_chunk, nth, memory_order_relaxed);
}

ggml_barrier(params->threadpool);
Expand Down Expand Up @@ -12582,7 +12586,7 @@ UseGgmlGemm2:;
break;
}

current_chunk = atomic_fetch_add(&params->threadpool->current_chunk, 1);
current_chunk = atomic_fetch_add_explicit(&params->threadpool->current_chunk, 1, memory_order_relaxed);
}
}

Expand Down

0 comments on commit 6b22b53

Please sign in to comment.