Skip to content

Commit

Permalink
ggml : hide ggml_object, ggml_cgraph, ggml_hash_set (ggerganov#9408)
Browse files Browse the repository at this point in the history
* ggml : hide ggml_object, ggml_cgraph, ggml_hash_set

ggml-ci

* ggml : add ggml-impl.h to backends

* ggml : fix compiler warnings

ggml-ci

* ggml : add assert upon adding nodes
  • Loading branch information
ggerganov authored and arthw committed Nov 18, 2024
1 parent 299e70d commit 4aa7e80
Show file tree
Hide file tree
Showing 18 changed files with 170 additions and 129 deletions.
6 changes: 3 additions & 3 deletions examples/benchmark/benchmark-matmult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ int main(int argc, char ** argv) {

ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);

TENSOR_DUMP(gf->nodes[0]);
TENSOR_DUMP(ggml_graph_node(gf, 0));

printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));

Expand Down Expand Up @@ -224,7 +224,7 @@ int main(int argc, char ** argv) {


// Let's use the F32 result from above as a reference for the quantized multiplication
float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]);
float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));

printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
printf("=====================================================================================\n");
Expand Down Expand Up @@ -252,7 +252,7 @@ int main(int argc, char ** argv) {

// Check that the matrix multiplication result is in the right ballpark
// We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]);
float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; // Let's accept an epsilon of 10^-6

Expand Down
4 changes: 2 additions & 2 deletions examples/cvector-generator/pca.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ static ggml_status compute_piter(
result.eigenvectors.resize(params.n_batch);
result.distances.resize(params.n_batch);
// get output nodes
for (int i = 0; i < gf->n_nodes; ++i) {
auto node = gf->nodes[i];
for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
auto node = ggml_graph_node(gf, i);
int iter = -1;
// find b_tensor (without copying data from device)
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
Expand Down
2 changes: 1 addition & 1 deletion examples/export-lora/export-lora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ struct lora_merge_ctx {

// write data to output file
{
auto result = gf->nodes[gf->n_nodes - 1];
auto * result = ggml_graph_node(gf, -1);
size_t len = ggml_nbytes(result);
if (read_buf.size() < len) {
read_buf.resize(len);
Expand Down
2 changes: 1 addition & 1 deletion examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
ggml_backend_graph_compute(ctx->backend, gf);

// the last node is the embedding tensor
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);

// copy the embeddings to the location passed by the user
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
Expand Down
2 changes: 1 addition & 1 deletion examples/llava/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
// ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
ggml_build_forward_expand(gf, flatten);
ggml_graph_compute_with_ctx(model.ctx, gf, 1);
struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor* result = ggml_graph_node(gf, -1);

memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
// append without newline tokens (default behavior in llava_arch when not using unpad ):
Expand Down
87 changes: 24 additions & 63 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ extern "C" {

struct ggml_object;
struct ggml_context;
struct ggml_cgraph;

// NOTE: always add types at the end of the enum to keep backward compatibility
enum ggml_type {
Expand Down Expand Up @@ -575,23 +576,9 @@ extern "C" {
GGML_TENSOR_FLAG_PARAM = 4,
};

// ggml object
struct ggml_object {
size_t offs;
size_t size;

struct ggml_object * next;

enum ggml_object_type type;

char padding[4];
};

static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);

// n-dimensional tensor
struct ggml_tensor {
enum ggml_type type;
enum ggml_type type;

GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");

Expand Down Expand Up @@ -655,7 +642,7 @@ extern "C" {

struct ggml_threadpool; // forward declaration, see ggml.c

typedef struct ggml_threadpool * ggml_threadpool_t;
typedef struct ggml_threadpool * ggml_threadpool_t;

// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
Expand All @@ -671,35 +658,6 @@ extern "C" {
void * abort_callback_data;
};

enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};

typedef uint32_t ggml_bitset_t;

struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};

// computation graph
struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;

struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;

struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;
};

// scratch buffer
struct ggml_scratch {
size_t offs;
Expand Down Expand Up @@ -2017,8 +1975,6 @@ extern "C" {
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);

#define GGML_N_TASKS_MAX -1

GGML_API struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -2088,30 +2044,35 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);


GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);

// graph allocation in a context
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);

GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);

GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);

GGML_API size_t ggml_graph_overhead(void);
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);

GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params *p, int n_threads);
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
GGML_API struct ggml_threadpool* ggml_threadpool_new (struct ggml_threadpool_params * params);
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);

// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-blas.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "ggml-impl.h"
#include "ggml-blas.h"
#include "ggml-backend-impl.h"

Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <cstring>
#include <mutex>

#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-cann/aclnn_ops.h"
#include "ggml-cann/common.h"
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "ggml-cuda.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-cuda/common.cuh"
Expand Down
32 changes: 32 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,16 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif

enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};

// bitset

typedef uint32_t ggml_bitset_t;

static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
Expand All @@ -656,6 +664,12 @@ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
#define GGML_HASHSET_FULL ((size_t)-1)
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)

struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};

struct ggml_hash_set ggml_hash_set_new(size_t size);
void ggml_hash_set_free(struct ggml_hash_set * hash_set);

Expand Down Expand Up @@ -745,6 +759,24 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
GGML_ABORT("fatal error");
}

// computation graph

struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;

struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;

struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;
};

struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion ggml/src/ggml-kompute.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend.h"
#include "ggml-backend-impl.h"
#include "ggml-kompute.h"
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-metal.m
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#import "ggml-metal.h"

#import "ggml-impl.h"
#import "ggml-backend-impl.h"
#import "ggml.h"

#import <Foundation/Foundation.h>

Expand Down Expand Up @@ -882,7 +882,7 @@ static enum ggml_status ggml_metal_graph_compute(
// create multiple command buffers and enqueue them
// then, we encode the graph into the command buffers in parallel

const int n_nodes = gf->n_nodes;
const int n_nodes = gf->n_nodes;
const int n_cb = ctx->n_cb;
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-rpc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "ggml-rpc.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include <cinttypes>
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <sycl/half_type.hpp>

#include "ggml-sycl.h"
#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-sycl/backend.hpp"
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <memory>
#include <mutex>

#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include "ggml-vulkan-shaders.hpp"
Expand Down
Loading

0 comments on commit 4aa7e80

Please sign in to comment.