Skip to content

Commit

Permalink
Change requirement of last backend being CPU to requiring its default…
Browse files Browse the repository at this point in the history
… buffer type be a host buffer, fix rebase errors
  • Loading branch information
AutonomicPerfectionist committed Mar 15, 2024
1 parent e8a6156 commit 2217b02
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
2 changes: 1 addition & 1 deletion ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1696,7 +1696,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
bool parallel) {
GGML_ASSERT(n_backends > 0);
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
GGML_ASSERT(ggml_backend_buft_is_host(ggml_backend_get_default_buffer_type(backends[n_backends - 1]))); // last backend must be host

struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);

Expand Down
7 changes: 4 additions & 3 deletions ggml-mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ GGML_CALL static enum ggml_status ggml_backend_mpi_graph_compute(ggml_backend_t

if (!ctx->remote) {
ggml_backend_sched_t sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(),
(int) ctx->backends.size(), cgraph->n_nodes);
(int) ctx->backends.size(), cgraph->n_nodes, false);

ggml_backend_sched_reserve(sched, cgraph);
ggml_backend_sched_graph_compute(sched, cgraph);
Expand Down Expand Up @@ -850,7 +850,7 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_mpi_wrap_buffer(ggml_backend_buffer
return buffer;
}

bool ggml_backend_mpi_cpy_tensor_async(ggml_backend_t backend, const struct ggml_tensor * src, struct ggml_tensor * dst) {
bool ggml_backend_mpi_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst) {
// int src_rank = ggml_backend_mpi_buffer_rank(src->buffer);
// int dst_rank = ggml_backend_mpi_buffer_rank(dst->buffer);
//
Expand All @@ -870,7 +870,8 @@ bool ggml_backend_mpi_cpy_tensor_async(ggml_backend_t backend, const struct ggml
// } else if (dst_rank == ggml_backend_mpi_local_rank(backend)){
// ggml_mpi_tensor_recv(dst, src_rank, ctx->comm);
// }
return true;
// fprintf(stderr, "ATTEMPTING ASYNC COPY FOR SRC TENSOR %s TO DST TENSOR %s WITH SRC BACKEND %s AND DST BACKEND %s\n", src->name, dst->name, ggml_backend_name(backend_src), ggml_backend_name(backend_dst));
return false;

}

Expand Down
18 changes: 11 additions & 7 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9012,13 +9012,15 @@ static int llama_decode_internal(
// ggml_graph_dump_dot(gf, NULL, "llama.dot");
//}

#ifdef GGML_USE_MPI
if (ggml_mpi_rank(lctx.model.ctx_mpi) == 0) {
#endif

// extract logits
// TODO: do not compute and extract logits if only embeddings are needed
// update the graphs to skip "result_output" if logits are not needed
if (res) {
#ifdef GGML_USE_MPI
if (ggml_mpi_rank(lctx.model.ctx_mpi) == 0) {
#endif


ggml_backend_t backend_res = ggml_backend_sched_get_tensor_backend(lctx.sched, res);
GGML_ASSERT(backend_res != nullptr);
Expand Down Expand Up @@ -9104,6 +9106,10 @@ static int llama_decode_internal(
} break;
}
}

#ifdef GGML_USE_MPI
}
#endif
}

// wait for the computation to finish (automatically done when obtaining the model output)
Expand All @@ -9121,9 +9127,7 @@ static int llama_decode_internal(
}
}

#ifdef GGML_USE_MPI
}
#endif


return 0;
}
Expand Down Expand Up @@ -13051,7 +13055,7 @@ struct llama_context * llama_new_context_with_model(


// ctx->backend_cpu = ctx->backends.back();
ctx->backends.push_back(ctx->backend_cpu);
ctx->backends.push_back(ggml_backend_mpi_init(&ctx->backend_cpu, 1, ggml_mpi_rank(model->ctx_mpi)));

#endif

Expand Down

0 comments on commit 2217b02

Please sign in to comment.