Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Jan 4, 2025
2 parents 0ed12c4 + dea7906 commit 967c52f
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 35 deletions.
2 changes: 2 additions & 0 deletions lib/prostt5/ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ ggml_backend_t ggml_backend_init_best(void) {
return ggml_backend_dev_init(dev, nullptr);
}

#if 0
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
return get_reg().load_backend(utf8_to_utf16(path), false);
Expand Down Expand Up @@ -575,3 +576,4 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("musa", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path);
}
#endif
8 changes: 4 additions & 4 deletions lib/prostt5/ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list(APPEND ARCH_FLAGS /arch:AVX)
list(APPEND ARCH_DEFINITIONS GGML_AVX)
else ()
list(APPEND ARCH_FLAGS /arch:SSE4.2)
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
list(APPEND ARCH_FLAGS /arch:SSE4.1)
list(APPEND ARCH_DEFINITIONS GGML_SSE41)
endif()
if (GGML_AVX_VNNI)
# MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2
Expand All @@ -222,8 +222,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
else ()
list(APPEND ARCH_FLAGS -msse4.2)
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
list(APPEND ARCH_FLAGS -msse2 -msse3 -mssse3 -msse4.1)
list(APPEND ARCH_DEFINITIONS GGML_SSE41)
if (GGML_F16C)
list(APPEND ARCH_FLAGS -mf16c)
list(APPEND ARCH_DEFINITIONS GGML_F16C)
Expand Down
4 changes: 2 additions & 2 deletions lib/prostt5/ggml/src/ggml-cpu/cpu-feats-x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,8 @@ static int ggml_backend_cpu_x86_score() {
if (!is.F16C()) { return 0; }
score += 1<<1;
#endif
#ifdef GGML_SSE42
if (!is.SSE42()) { return 0; }
#ifdef GGML_SSE41
if (!is.SSE41()) { return 0; }
score += 1<<2;
#endif
#ifdef GGML_AVX
Expand Down
41 changes: 12 additions & 29 deletions lib/prostt5/src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11231,25 +11231,13 @@ struct llm_build_context {

#if 1
// ggml_graph_print(gf);
// The shape of the raw embeddings is [enc_input_size, n_embd], or conceptually (S, H).
// If we want a 3D tensor shape [B=1, S=enc_input_size, H=n_embd], we can do:
//int B = cur->ne[2];
//int H = cur->ne[0];

#define PRINT_TENSOR_DIMS(name, tensor) \
std::cout << name << " " << (tensor)->ne[0] << "\t" << (tensor)->ne[1] << "\t" << (tensor)->ne[2] << "\t" << (tensor)->ne[3] << std::endl;
// #define PRINT_TENSOR_DIMS(name, tensor) std::cout << name << " " << (tensor)->ne[0] << "\t" << (tensor)->ne[1] << "\t" << (tensor)->ne[2] << "\t" << (tensor)->ne[3] << std::endl;

// 1) Slicing: skip first row in dim1
//std::cout << "n_tokens " << n_tokens << std::endl;
//int new_seq_len = std::max(n_tokens, (int)2) - 2; // if skipping only the very first row
//int new_seq_len = n_tokens - 2; // if skipping only the very first row
size_t offset_bytes = cur->nb[1] * 1ULL; // skip one row in the dim1 direction

//std::cout << "cur " << cur->ne[0] << "\t" << cur->ne[1] << "\t" << cur->ne[2] << std::endl;
// create a sub-view with 8-argument ggml_view_3d()
ggml_tensor * cur_sliced = ggml_view_3d(
ctx0,
cur, // the original tensor
cur,
/* ne0 */ cur->ne[0],
/* ne1 */ cur->ne[1] - 2,
/* ne2 */ cur->ne[2],
Expand All @@ -11259,10 +11247,10 @@ struct llm_build_context {
);
cb(cur_sliced, "cur_sliced", -1);

ggml_tensor * cur_padded = ggml_cont(ctx0, ggml_pad(ctx0, cur_sliced,
ggml_tensor * cur_padded = ggml_pad(ctx0, cur_sliced,
/*p0=*/0, /*p1=*/1, // no pad on the n_embd dimension
/*p2=*/0, /*p3=*/0 // pad +1 at the end of the tokens dimension
));
);
cb(cur_padded, "cur_padded", -1);
// PRINT_TENSOR_DIMS("cur_padded", cur_padded)

Expand All @@ -11277,34 +11265,29 @@ struct llm_build_context {
// ggml_tensor* cur_conv0 = ggml_conv_2d(ctx0, cw0, permuted_tensor, 1, 1, 3, 0, 1, 1);
ggml_tensor* cur_conv0 = ggml_conv_2d(ctx0, model.conv0, permuted_tensor, 1, 1, 3, 0, 1, 1);
cb(cur_conv0, "cur_conv0", -1);
// ggml_build_forward_expand(gf, cur_conv0);
//PRINT_TENSOR_DIMS("cur_conv0", cur_conv0)
// ggml_graph_print(gf);

//PRINT_TENSOR_DIMS("cur_conv0", cur_conv0)
//PRINT_TENSOR_DIMS("model.conv0_b", model.conv0_b)
ggml_tensor* cur_conv0b = ggml_add(ctx0, cur_conv0, ggml_reshape_4d(ctx0, model.conv0_b, 1, 1, 32, 1));
ggml_tensor* cur_conv0b = ggml_add_inplace(ctx0, cur_conv0, ggml_reshape_4d(ctx0, model.conv0_b, 1, 1, 32, 1));
cb(cur_conv0b, "cur_conv0b", -1);

// ggml_build_forward_expand(gf, cur_conv0b);

//cb(cur_conv0b, "result_embd_pooled", -1);
//PRINT_TENSOR_DIMS("cur_conv0b", cur_conv0b)
ggml_tensor* cur_relu = ggml_relu(ctx0, cur_conv0b);

ggml_tensor* cur_relu = ggml_relu_inplace(ctx0, cur_conv0b);
cb(cur_relu, "cur_relu", -1);
//PRINT_TENSOR_DIMS("cur_relu", cur_relu)

// ggml_tensor* cw3 = ggml_cont(ctx0, ggml_permute(ctx0, model.conv3, 1, 0, 2, 3));
// cb(cw3, "cw3", -1);

//PRINT_TENSOR_DIMS("cur_relu", cur_relu)
// ggml_tensor* cur_conv3 = ggml_conv_2d(ctx0, cw3, cur_relu, 1, 1, 3, 0, 1, 1);
ggml_tensor* cur_conv3 = ggml_conv_2d(ctx0, model.conv3, cur_relu, 1, 1, 3, 0, 1, 1);
cb(cur_conv3, "cur_conv3", -1);

//PRINT_TENSOR_DIMS("cur_conv3", cur_conv3)
ggml_tensor* cur_conv3b = ggml_add(ctx0, cur_conv3, ggml_reshape_4d(ctx0, model.conv3_b, 1, 1, 20, 1));
cb(cur_conv3b, "result_embd_pooled", -1);

ggml_tensor* cur_conv3b = ggml_add_inplace(ctx0, cur_conv3, ggml_reshape_4d(ctx0, model.conv3_b, 1, 1, 20, 1));
cb(cur_conv3b, "result_embd_pooled", -1);
//PRINT_TENSOR_DIMS("cur_conv3b", cur_conv3b)

ggml_build_forward_expand(gf, cur_conv3b);
// ggml_graph_print(gf);
#endif
Expand Down

0 comments on commit 967c52f

Please sign in to comment.