Skip to content

Commit

Permalink
ggml : remove ggml_task_type and GGML_PERF (ggerganov#8017)
Browse files Browse the repository at this point in the history
* ggml : remove ggml_task_type and GGML_PERF

* check abort_callback on main thread only

* vulkan : remove usage of ggml_compute_params

* remove LLAMA_PERF
  • Loading branch information
slaren authored and MagnusS0 committed Jul 1, 2024
1 parent bd77273 commit f92b903
Show file tree
Hide file tree
Showing 8 changed files with 398 additions and 1,078 deletions.
7 changes: 0 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ option(LLAMA_BUILD_SERVER "llama: build server example"
option(LLAMA_LASX "llama: enable lasx" ON)
option(LLAMA_LSX "llama: enable lsx" ON)

# add perf arguments
option(LLAMA_PERF "llama: enable perf" OFF)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)

Expand Down Expand Up @@ -870,10 +867,6 @@ if (LLAMA_CPU_HBM)
target_link_libraries(ggml PUBLIC memkind)
endif()

if (LLAMA_PERF)
add_compile_definitions(GGML_PERF)
endif()

function(get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")
Expand Down
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,6 @@ ifdef LLAMA_GPROF
MK_CFLAGS += -pg
MK_CXXFLAGS += -pg
endif
ifdef LLAMA_PERF
MK_CPPFLAGS += -DGGML_PERF
endif

# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
Expand Down
36 changes: 10 additions & 26 deletions ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ static size_t vk_skip_checks;
static size_t vk_output_tensor;

static void ggml_vk_print_tensor(ggml_backend * ctx, const ggml_tensor * tensor, const char * name);
static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor);
static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor);
static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * tensor);
static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor * tensor);
#endif

typedef void (*ggml_vk_func_t)(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
Expand Down Expand Up @@ -5644,7 +5644,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
}
}

static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor){
ggml_tensor_extra_gpu * extra = nullptr;

switch (tensor->op) {
Expand Down Expand Up @@ -5697,17 +5697,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
return false;
}

if (params->ith != 0) {
return true;
}
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return true;
}

VK_LOG_DEBUG("ggml_vk_compute_forward(" << tensor << ", name=" << tensor->name << ", op=" << ggml_op_name(tensor->op) << ", type=" << tensor->type << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << ", view_src=" << tensor->view_src << ", view_offs=" << tensor->view_offs << ")");

#ifdef GGML_VULKAN_CHECK_RESULTS
ggml_vk_check_results_0(ctx, params, tensor);
ggml_vk_check_results_0(ctx, tensor);
#endif

vk_context& subctx = ctx->gc.contexts[extra->ctx_idx];
Expand Down Expand Up @@ -6214,23 +6207,20 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen
ggml_vk_build_graph(ctx,cgraph->nodes[i], i == last_node);
}

ggml_compute_params params = {};
params.type = GGML_TASK_TYPE_COMPUTE;
params.ith = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];

if (ggml_vk_is_empty(node)) {
continue;
}

bool ok = ggml_vk_compute_forward(ctx, &params, node);
bool ok = ggml_vk_compute_forward(ctx, node);
if (!ok) {
fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
}
#ifdef GGML_VULKAN_CHECK_RESULTS
else {
ggml_vk_check_results_1(ctx, &params, node);
ggml_vk_check_results_1(ctx, node);
}
#endif
GGML_ASSERT(ok);
Expand Down Expand Up @@ -6600,11 +6590,8 @@ void * comp_result;
size_t comp_size;
size_t comp_nb[GGML_MAX_DIMS];
size_t check_counter = 0;
static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor) {
if (params->ith != 0) {
return;
}
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor * tensor) {
if (tensor->op == GGML_OP_TRANSPOSE) {
return;
}

Expand Down Expand Up @@ -6908,11 +6895,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
ggml_free(ggml_ctx);
}

static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor) {
if (params->ith != 0) {
return;
}
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor * tensor) {
if (tensor->op == GGML_OP_TRANSPOSE) {
return;
}
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
Expand Down
Loading

0 comments on commit f92b903

Please sign in to comment.