Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml : hide ggml_object, ggml_cgraph, ggml_hash_set #9408

Merged
merged 4 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/benchmark/benchmark-matmult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ int main(int argc, char ** argv) {

ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);

TENSOR_DUMP(gf->nodes[0]);
TENSOR_DUMP(ggml_graph_node(gf, 0));

printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));

Expand Down Expand Up @@ -224,7 +224,7 @@ int main(int argc, char ** argv) {


// Let's use the F32 result from above as a reference for the quantized multiplication
float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]);
float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));

printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
printf("=====================================================================================\n");
Expand Down Expand Up @@ -252,7 +252,7 @@ int main(int argc, char ** argv) {

// Check that the matrix multiplication result is in the right ballpark
// We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]);
float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; // Let's accept an epsilon of 10^-6

Expand Down
4 changes: 2 additions & 2 deletions examples/cvector-generator/pca.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ static ggml_status compute_piter(
result.eigenvectors.resize(params.n_batch);
result.distances.resize(params.n_batch);
// get output nodes
for (int i = 0; i < gf->n_nodes; ++i) {
auto node = gf->nodes[i];
for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
auto node = ggml_graph_node(gf, i);
int iter = -1;
// find b_tensor (without copying data from device)
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
Expand Down
2 changes: 1 addition & 1 deletion examples/export-lora/export-lora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ struct lora_merge_ctx {

// write data to output file
{
auto result = gf->nodes[gf->n_nodes - 1];
auto * result = ggml_graph_node(gf, -1);
size_t len = ggml_nbytes(result);
if (read_buf.size() < len) {
read_buf.resize(len);
Expand Down
2 changes: 1 addition & 1 deletion examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
ggml_backend_graph_compute(ctx->backend, gf);

// the last node is the embedding tensor
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);

// copy the embeddings to the location passed by the user
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
Expand Down
2 changes: 1 addition & 1 deletion examples/llava/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
// ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
ggml_build_forward_expand(gf, flatten);
ggml_graph_compute_with_ctx(model.ctx, gf, 1);
struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor* result = ggml_graph_node(gf, -1);

memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
// append without newline tokens (default behavior in llava_arch when not using unpad ):
Expand Down
87 changes: 24 additions & 63 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ extern "C" {

struct ggml_object;
struct ggml_context;
struct ggml_cgraph;

// NOTE: always add types at the end of the enum to keep backward compatibility
enum ggml_type {
Expand Down Expand Up @@ -575,23 +576,9 @@ extern "C" {
GGML_TENSOR_FLAG_PARAM = 4,
};

// ggml object
struct ggml_object {
size_t offs;
size_t size;

struct ggml_object * next;

enum ggml_object_type type;

char padding[4];
};

static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);

// n-dimensional tensor
struct ggml_tensor {
enum ggml_type type;
enum ggml_type type;

GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");

Expand Down Expand Up @@ -655,7 +642,7 @@ extern "C" {

struct ggml_threadpool; // forward declaration, see ggml.c

typedef struct ggml_threadpool * ggml_threadpool_t;
typedef struct ggml_threadpool * ggml_threadpool_t;

// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
Expand All @@ -671,35 +658,6 @@ extern "C" {
void * abort_callback_data;
};

enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};

typedef uint32_t ggml_bitset_t;

struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};

// computation graph
struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;

struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;

struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;
};

// scratch buffer
struct ggml_scratch {
size_t offs;
Expand Down Expand Up @@ -2017,8 +1975,6 @@ extern "C" {
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);

#define GGML_N_TASKS_MAX -1

GGML_API struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -2088,30 +2044,35 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);


GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);

// graph allocation in a context
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);

GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);

GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);

GGML_API size_t ggml_graph_overhead(void);
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);

GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params *p, int n_threads);
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
GGML_API struct ggml_threadpool* ggml_threadpool_new (struct ggml_threadpool_params * params);
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);

// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-blas.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "ggml-impl.h"
#include "ggml-blas.h"
#include "ggml-backend-impl.h"

Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <cstring>
#include <mutex>

#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-cann/aclnn_ops.h"
#include "ggml-cann/common.h"
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "ggml-cuda.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-cuda/common.cuh"
Expand Down
32 changes: 32 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,16 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif

enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};

// bitset

typedef uint32_t ggml_bitset_t;

static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
Expand All @@ -656,6 +664,12 @@ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
#define GGML_HASHSET_FULL ((size_t)-1)
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)

struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};

struct ggml_hash_set ggml_hash_set_new(size_t size);
void ggml_hash_set_free(struct ggml_hash_set * hash_set);

Expand Down Expand Up @@ -745,6 +759,24 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
GGML_ABORT("fatal error");
}

// computation graph

struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;

struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;

struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;
};

struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion ggml/src/ggml-kompute.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend.h"
#include "ggml-backend-impl.h"
#include "ggml-kompute.h"
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-metal.m
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#import "ggml-metal.h"

#import "ggml-impl.h"
#import "ggml-backend-impl.h"
#import "ggml.h"

#import <Foundation/Foundation.h>

Expand Down Expand Up @@ -882,7 +882,7 @@ static enum ggml_status ggml_metal_graph_compute(
// create multiple command buffers and enqueue them
// then, we encode the graph into the command buffers in parallel

const int n_nodes = gf->n_nodes;
const int n_nodes = gf->n_nodes;
const int n_cb = ctx->n_cb;
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-rpc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "ggml-rpc.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include <cinttypes>
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <sycl/half_type.hpp>

#include "ggml-sycl.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-sycl/backend.hpp"
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <memory>
#include <mutex>

#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-vulkan-shaders.hpp"
Expand Down
Loading
Loading