Skip to content

Commit

Permalink
update backends
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
slaren committed Mar 17, 2024
1 parent 0661e6a commit cc9299c
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 6 deletions.
6 changes: 1 addition & 5 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7791,10 +7791,6 @@ struct cuda_pool_alloc {

static bool g_cublas_loaded = false;

static bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
}

static void ggml_init_cublas() {
static bool initialized = false;

Expand Down Expand Up @@ -11381,7 +11377,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
const int min_batch_size = 32;

return op->ne[1] > min_batch_size && op->op != GGML_OP_GET_ROWS;
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;

UNUSED(backend);
}
Expand Down
1 change: 1 addition & 0 deletions ggml-kompute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,7 @@ static struct ggml_backend_i kompute_backend_i = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
/* .supports_op = */ ggml_backend_kompute_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
Expand Down
1 change: 1 addition & 0 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -2837,6 +2837,7 @@ GGML_CALL static bool ggml_backend_metal_supports_op(ggml_backend_t backend, con
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_metal_graph_compute,
/* .supports_op = */ ggml_backend_metal_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
Expand Down
1 change: 1 addition & 0 deletions ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17390,6 +17390,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .supports_op = */ ggml_backend_sycl_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
Expand Down
1 change: 1 addition & 0 deletions ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5693,6 +5693,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_vk_graph_compute,
/* .supports_op = */ ggml_backend_vk_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8614,7 +8614,7 @@ static struct ggml_cgraph * llama_build_graph(
// norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
// FIXME: fix in ggml_backend_sched
const bool full_offload = lctx.model.n_gpu_layers > (int)lctx.model.hparams.n_layer;
if (batch.n_tokens <= 32 || full_offload) {
if (batch.n_tokens < 32 || full_offload) {
if (il != -1 && strcmp(name, "norm") == 0) {
for (auto * backend : lctx.backends) {
if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {
Expand Down

0 comments on commit cc9299c

Please sign in to comment.