Merge branch 'master' of https://github.com/steineggerlab/foldseek

steineggerlab · Jan 4, 2025 · 967c52f · 967c52f
2 parents 0ed12c4 + dea7906
commit 967c52f
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 35 deletions.
diff --git a/lib/prostt5/ggml/src/ggml-backend-reg.cpp b/lib/prostt5/ggml/src/ggml-backend-reg.cpp
@@ -389,6 +389,7 @@ ggml_backend_t ggml_backend_init_best(void) {
     return ggml_backend_dev_init(dev, nullptr);
 }
 
+#if 0
 // Dynamic loading
 ggml_backend_reg_t ggml_backend_load(const char * path) {
     return get_reg().load_backend(utf8_to_utf16(path), false);
@@ -575,3 +576,4 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
     ggml_backend_load_best("musa", silent, dir_path);
     ggml_backend_load_best("cpu", silent, dir_path);
 }
+#endif
diff --git a/lib/prostt5/ggml/src/ggml-cpu/CMakeLists.txt b/lib/prostt5/ggml/src/ggml-cpu/CMakeLists.txt
@@ -211,8 +211,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
                 list(APPEND ARCH_FLAGS /arch:AVX)
                 list(APPEND ARCH_DEFINITIONS GGML_AVX)
             else ()
-                list(APPEND ARCH_FLAGS /arch:SSE4.2)
-                list(APPEND ARCH_DEFINITIONS GGML_SSE42)
+                list(APPEND ARCH_FLAGS /arch:SSE4.1)
+                list(APPEND ARCH_DEFINITIONS GGML_SSE41)
             endif()
             if (GGML_AVX_VNNI)
                 # MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2
@@ -222,8 +222,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
             if (GGML_NATIVE)
                 list(APPEND ARCH_FLAGS -march=native)
             else ()
-                list(APPEND ARCH_FLAGS -msse4.2)
-                list(APPEND ARCH_DEFINITIONS GGML_SSE42)
+                list(APPEND ARCH_FLAGS -msse2 -msse3 -mssse3 -msse4.1)
+                list(APPEND ARCH_DEFINITIONS GGML_SSE41)
                 if (GGML_F16C)
                     list(APPEND ARCH_FLAGS -mf16c)
                     list(APPEND ARCH_DEFINITIONS GGML_F16C)

diff --git a/lib/prostt5/ggml/src/ggml-cpu/cpu-feats-x86.cpp b/lib/prostt5/ggml/src/ggml-cpu/cpu-feats-x86.cpp
@@ -274,8 +274,8 @@ static int ggml_backend_cpu_x86_score() {
     if (!is.F16C()) { return 0; }
     score += 1<<1;
 #endif
-#ifdef GGML_SSE42
-    if (!is.SSE42()) { return 0; }
+#ifdef GGML_SSE41
+    if (!is.SSE41()) { return 0; }
     score += 1<<2;
 #endif
 #ifdef GGML_AVX

diff --git a/lib/prostt5/src/llama.cpp b/lib/prostt5/src/llama.cpp
@@ -11231,25 +11231,13 @@ struct llm_build_context {
 
 #if 1
         // ggml_graph_print(gf);
-        // The shape of the raw embeddings is [enc_input_size, n_embd], or conceptually (S, H).
-        // If we want a 3D tensor shape [B=1, S=enc_input_size, H=n_embd], we can do:
-        //int B = cur->ne[2];
-        //int H = cur->ne[0];
-
-#define PRINT_TENSOR_DIMS(name, tensor) \
-    std::cout << name << " " << (tensor)->ne[0] << "\t" << (tensor)->ne[1] << "\t" << (tensor)->ne[2] << "\t" << (tensor)->ne[3] << std::endl;
+        // #define PRINT_TENSOR_DIMS(name, tensor) std::cout << name << " " << (tensor)->ne[0] << "\t" << (tensor)->ne[1] << "\t" << (tensor)->ne[2] << "\t" << (tensor)->ne[3] << std::endl;
 
         // 1) Slicing: skip first row in dim1
-        //std::cout << "n_tokens " << n_tokens << std::endl;
-        //int new_seq_len = std::max(n_tokens, (int)2) - 2;  // if skipping only the very first row
-        //int new_seq_len = n_tokens - 2;  // if skipping only the very first row
         size_t offset_bytes = cur->nb[1] * 1ULL;  // skip one row in the dim1 direction
-
-        //std::cout << "cur " << cur->ne[0] << "\t" << cur->ne[1] << "\t" << cur->ne[2] << std::endl;
-        // create a sub-view with 8-argument ggml_view_3d()
         ggml_tensor * cur_sliced = ggml_view_3d(
             ctx0,
-            cur,          // the original tensor
+            cur,
             /* ne0   */ cur->ne[0],
             /* ne1   */ cur->ne[1] - 2,
             /* ne2   */ cur->ne[2],
@@ -11259,10 +11247,10 @@ struct llm_build_context {
         );
         cb(cur_sliced, "cur_sliced", -1);
 
-        ggml_tensor * cur_padded = ggml_cont(ctx0, ggml_pad(ctx0, cur_sliced,
+        ggml_tensor * cur_padded = ggml_pad(ctx0, cur_sliced,
             /*p0=*/0, /*p1=*/1,  // no pad on the n_embd dimension
             /*p2=*/0, /*p3=*/0   // pad +1 at the end of the tokens dimension
-        ));
+        );
         cb(cur_padded, "cur_padded", -1);
         // PRINT_TENSOR_DIMS("cur_padded", cur_padded)
 
@@ -11277,34 +11265,29 @@ struct llm_build_context {
         // ggml_tensor* cur_conv0 = ggml_conv_2d(ctx0, cw0, permuted_tensor, 1, 1, 3, 0, 1, 1);
         ggml_tensor* cur_conv0 = ggml_conv_2d(ctx0, model.conv0, permuted_tensor, 1, 1, 3, 0, 1, 1);
         cb(cur_conv0, "cur_conv0", -1);
-        // ggml_build_forward_expand(gf, cur_conv0);
+        //PRINT_TENSOR_DIMS("cur_conv0", cur_conv0)
         // ggml_graph_print(gf);
 
-        //PRINT_TENSOR_DIMS("cur_conv0", cur_conv0)
-        //PRINT_TENSOR_DIMS("model.conv0_b", model.conv0_b)
-        ggml_tensor* cur_conv0b = ggml_add(ctx0, cur_conv0, ggml_reshape_4d(ctx0, model.conv0_b, 1, 1, 32, 1));
+        ggml_tensor* cur_conv0b = ggml_add_inplace(ctx0, cur_conv0, ggml_reshape_4d(ctx0, model.conv0_b, 1, 1, 32, 1));
         cb(cur_conv0b, "cur_conv0b", -1);
-
-        // ggml_build_forward_expand(gf, cur_conv0b);
-
-        //cb(cur_conv0b, "result_embd_pooled", -1);
         //PRINT_TENSOR_DIMS("cur_conv0b", cur_conv0b)
-        ggml_tensor* cur_relu = ggml_relu(ctx0, cur_conv0b);
+
+        ggml_tensor* cur_relu = ggml_relu_inplace(ctx0, cur_conv0b);
         cb(cur_relu, "cur_relu", -1);
+        //PRINT_TENSOR_DIMS("cur_relu", cur_relu)
 
         // ggml_tensor* cw3 = ggml_cont(ctx0, ggml_permute(ctx0, model.conv3, 1, 0, 2, 3));
         // cb(cw3, "cw3", -1);
 
-        //PRINT_TENSOR_DIMS("cur_relu", cur_relu)
         // ggml_tensor* cur_conv3 = ggml_conv_2d(ctx0, cw3, cur_relu, 1, 1, 3, 0, 1, 1);
         ggml_tensor* cur_conv3 = ggml_conv_2d(ctx0, model.conv3, cur_relu, 1, 1, 3, 0, 1, 1);
         cb(cur_conv3, "cur_conv3", -1);
-
         //PRINT_TENSOR_DIMS("cur_conv3", cur_conv3)
-        ggml_tensor* cur_conv3b = ggml_add(ctx0, cur_conv3, ggml_reshape_4d(ctx0, model.conv3_b, 1, 1, 20, 1));
-        cb(cur_conv3b, "result_embd_pooled", -1);
 
+        ggml_tensor* cur_conv3b = ggml_add_inplace(ctx0, cur_conv3, ggml_reshape_4d(ctx0, model.conv3_b, 1, 1, 20, 1));
+        cb(cur_conv3b, "result_embd_pooled", -1);
         //PRINT_TENSOR_DIMS("cur_conv3b", cur_conv3b)
+
         ggml_build_forward_expand(gf, cur_conv3b);
         // ggml_graph_print(gf);
 #endif