Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

naming : normalize the name of callback-related identifiers #9405

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
cparams.attention_type = params.attention_type;
cparams.defrag_thold = params.defrag_thold;
cparams.cb_eval = params.cb_eval;
cparams.cb_eval_user_data = params.cb_eval_user_data;
cparams.cb_eval_ctx = params.cb_eval_ctx;
cparams.offload_kqv = !params.no_kv_offload;
cparams.flash_attn = params.flash_attn;
cparams.no_perf = params.no_perf;
Expand Down
4 changes: 2 additions & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ struct gpt_params {
struct cpu_params draft_cpuparams;
struct cpu_params draft_cpuparams_batch;

ggml_backend_sched_eval_callback cb_eval = nullptr;
void * cb_eval_user_data = nullptr;
ggml_backend_sched_eval_callback cb_eval = nullptr;
void * cb_eval_ctx = nullptr;

ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;

Expand Down
26 changes: 13 additions & 13 deletions examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ static void print_usage(int, char ** argv) {


// cb_eval is reused for each pair of positive - negative prompt
struct callback_data {
struct callback_context {
ggml_context * ctx_ggml = nullptr; // holds v_pos, v_neg, v_diff_filtered

int n_layers = 0;
Expand Down Expand Up @@ -155,7 +155,7 @@ struct callback_data {
return diff_filtered;
}

// we don't implement destructor, because we want to reuse callback_data. we just want to free the tensors
// we don't implement destructor, because we want to reuse callback_context. we just want to free the tensors
void reset() {
for (auto ptr : v_pos) free(ptr->data);
for (auto ptr : v_neg) free(ptr->data);
Expand Down Expand Up @@ -320,20 +320,20 @@ static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool
//////////////////////////////////////////////////

static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
auto * cb_data = (callback_data *) user_data;
auto * cb_ctx = (callback_context *) user_data;
static const char * l_out_name = "l_out";
const bool is_l_out = strncmp(t->name, l_out_name, strlen(l_out_name)) == 0;

if (ask) {
return is_l_out;
}

if (!is_l_out || t->ne[1] != cb_data->n_tokens) {
if (!is_l_out || t->ne[1] != cb_ctx->n_tokens) {
return true;
}

// save the tensor to current context
cb_data->save_tensor_for_layer(t);
cb_ctx->save_tensor_for_layer(t);
return true;
}

Expand Down Expand Up @@ -400,12 +400,12 @@ int main(int argc, char ** argv) {
}


callback_data cb_data;
callback_context cb_ctx;

// pass the callback to the backend scheduler
// it will be executed for each node during the graph computation
params.cb_eval = cb_eval;
params.cb_eval_user_data = &cb_data;
params.cb_eval_ctx = &cb_ctx;
params.warmup = false;

print_build_info();
Expand Down Expand Up @@ -445,31 +445,31 @@ int main(int argc, char ** argv) {
for(size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
bool success = false;
tokenized_prompt t = tokenized_prompts[i];
cb_data.n_layers = n_layers;
cb_data.n_tokens = t.max_seq_len;
cb_ctx.n_layers = n_layers;
cb_ctx.n_tokens = t.max_seq_len;

printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n",
(int) i+1, (int) ctx_train.positive_entries.size(),
tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
(int) t.max_seq_len);

cb_data.is_eval_pos = true;
cb_ctx.is_eval_pos = true;
success = get_hidden_layers(ctx, t.tokens_pos);
if (!success) break;

cb_data.is_eval_pos = false;
cb_ctx.is_eval_pos = false;
success = get_hidden_layers(ctx, t.tokens_neg);
if (!success) break;

// calculate diff and remove all zero rows
auto v_diff_filtered = cb_data.calc_diff();
auto v_diff_filtered = cb_ctx.calc_diff();

// save & concat the filtered v_diff to ctx_train
ctx_train.concat_diff_tmp(v_diff_filtered);

// reset for next iteration
cb_data.reset();
cb_ctx.reset();
}

// done with the model, we can now free it to make gain some memory
Expand Down
26 changes: 13 additions & 13 deletions examples/eval-callback/eval-callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* This the arbitrary data which will be passed to each callback.
* Later on we can for example add operation or tensor name filter from the CLI arg, or a file descriptor to dump the tensor.
*/
struct callback_data {
struct callback_context {
std::vector<uint8_t> data;
};

Expand All @@ -27,7 +27,7 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
return str;
}

static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
static void ggml_print_tensor(const uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
GGML_ASSERT(n > 0);
float sum = 0;
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
Expand All @@ -52,15 +52,15 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
float v;
if (type == GGML_TYPE_F16) {
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
} else if (type == GGML_TYPE_F32) {
v = *(float *) &data[i];
v = *(const float *) &data[i];
} else if (type == GGML_TYPE_I32) {
v = (float) *(int32_t *) &data[i];
v = (float) *(const int32_t *) &data[i];
} else if (type == GGML_TYPE_I16) {
v = (float) *(int16_t *) &data[i];
v = (float) *(const int16_t *) &data[i];
} else if (type == GGML_TYPE_I8) {
v = (float) *(int8_t *) &data[i];
v = (float) *(const int8_t *) &data[i];
} else {
GGML_ABORT("fatal error");
}
Expand Down Expand Up @@ -88,7 +88,7 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
* @return true to receive data or continue the graph, false otherwise
*/
static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
auto * cb_data = (callback_data *) user_data;
auto * cb_ctx = (callback_context *) user_data;

const struct ggml_tensor * src0 = t->src[0];
const struct ggml_tensor * src1 = t->src[1];
Expand All @@ -114,12 +114,12 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {

if (!is_host) {
auto n_bytes = ggml_nbytes(t);
cb_data->data.resize(n_bytes);
ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
cb_ctx->data.resize(n_bytes);
ggml_backend_tensor_get(t, cb_ctx->data.data(), 0, n_bytes);
}

if (!ggml_is_quantized(t->type)) {
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
uint8_t * data = is_host ? (uint8_t *) t->data : cb_ctx->data.data();
ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
}

Expand All @@ -140,7 +140,7 @@ static bool run(llama_context * ctx, const gpt_params & params) {
}

int main(int argc, char ** argv) {
callback_data cb_data;
callback_context cb_ctx;

gpt_params params;

Expand All @@ -156,7 +156,7 @@ int main(int argc, char ** argv) {
// pass the callback to the backend scheduler
// it will be executed for each node during the graph computation
params.cb_eval = ggml_debug;
params.cb_eval_user_data = &cb_data;
params.cb_eval_ctx = &cb_ctx;
params.warmup = false;

// init
Expand Down
2 changes: 1 addition & 1 deletion examples/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ int main(int argc, char ** argv) {
// pass the callback to the backend scheduler
// it will be executed for each node during the graph computation
params.cb_eval = ik_collect_imatrix;
params.cb_eval_user_data = NULL;
params.cb_eval_ctx = NULL;
params.warmup = false;

// init
Expand Down
10 changes: 5 additions & 5 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ extern "C" {
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback cb, void * cb_ctx);

// Create a backend buffer from an existing pointer
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
Expand Down Expand Up @@ -177,7 +177,7 @@ extern "C" {
// when ask == false, the scheduler is passing the node tensor to the user for observation
// if the user returns false, the scheduler will cancel the graph compute
//
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * cb_ctx);

// Initialize a backend scheduler
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
Expand Down Expand Up @@ -208,7 +208,7 @@ extern "C" {
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);

// Set a callback to be called for each resulting node during graph compute
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback cb, void * cb_ctx);

//
// Utils
Expand All @@ -225,10 +225,10 @@ extern "C" {
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);

typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * cb_ctx);

// Compare the output of two backends
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback cb_eval, void * cb_eval_ctx);

// Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
Expand Down
4 changes: 2 additions & 2 deletions ggml/include/ggml-metal.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" {
// user-code should use only these functions
//

GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback cb, void * cb_ctx);

GGML_API ggml_backend_t ggml_backend_metal_init(void);

Expand All @@ -50,7 +50,7 @@ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void

GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);

GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback cb, void * cb_ctx);

GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);

Expand Down
14 changes: 7 additions & 7 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ extern "C" {
// Abort callback
// If not NULL, called before ggml computation
// If it returns true, the computation is aborted
typedef bool (*ggml_abort_callback)(void * data);
typedef bool (*ggml_abort_callback)(void * cb_ctx);

// Scheduling priorities
enum ggml_sched_priority {
Expand Down Expand Up @@ -655,8 +655,8 @@ extern "C" {
struct ggml_threadpool * threadpool;

// abort ggml_graph_compute when true
ggml_abort_callback abort_callback;
void * abort_callback_data;
ggml_abort_callback cb_abort;
void * cb_abort_ctx;
};

// scratch buffer
Expand Down Expand Up @@ -2143,8 +2143,8 @@ extern "C" {
GGML_LINESEARCH_INVALID_PARAMETERS,
};

typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
typedef void (*ggml_opt_callback)(void * cb_ctx, int accum_step, float * sched, bool * cancel);
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * cb_ctx);

// optimization parameters
//
Expand Down Expand Up @@ -2281,8 +2281,8 @@ extern "C" {
struct ggml_tensor * f,
struct ggml_cgraph * gf,
struct ggml_cgraph * gb,
ggml_opt_callback callback,
void * callback_data);
ggml_opt_callback cb_opt,
void * cb_opt_ctx);

//
// tensor flags
Expand Down
Loading
Loading