backup

ggerganov · May 22, 2024 · 27c3f29 · 27c3f29
1 parent e1b7913
commit 27c3f29
Show file tree

Hide file tree

Showing 8 changed files with 279 additions and 311 deletions.
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
diff --git a/ggml-sycl/common.hpp b/ggml-sycl/common.hpp
@@ -79,7 +79,6 @@ static int g_work_group_size = 0;
 #endif
 
 typedef sycl::queue *queue_ptr;
-typedef sycl::handler *handle_ptr;
 
 enum ggml_sycl_backend_gpu_mode {
   SYCL_UNSET_GPU_MODE = -1,
@@ -313,13 +312,12 @@ class sycl_gpu_mgr {
 };
 
 static sycl_gpu_mgr* g_sycl_gpu_mgr = new sycl_gpu_mgr(0);
-static int g_device_count = -1;
 static int g_all_sycl_device_count = -1;
 static int g_main_device = -1;
 static int g_main_device_id = -1;
 static bool g_ggml_backend_sycl_buffer_type_initialized = false;
 
-static std::array<float, SYCL_MAX_DEVICES> g_default_tensor_split = {};
+static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
 
 static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
 
@@ -341,25 +339,6 @@ int get_main_device();
   (void)bad_arch; // suppress unused function warning
 }
 
-/*
-device_index: device index from 0 to n (continue numbers).
-    It is used for device select/set in SYCL backend internal data structure.
-*/
-inline void check_allow_gpu_index(const int device_index) {
-  if (device_index >= g_device_count) {
-    char error_buf[256];
-    snprintf(
-        error_buf,
-        sizeof(error_buf),
-        "%s error: device_index:%d is out of range: [0-%d]",
-        __func__,
-        device_index,
-        g_device_count - 1);
-    fprintf(stderr, "%s\n", error_buf);
-    assert(false);
-  }
-}
-
 /*
 device_id: device ID is shown by ggml_backend_sycl_print_sycl_devices().
     It is only used to set current working device.
@@ -487,30 +466,16 @@ struct ggml_backend_sycl_context {
     std::string name;
 
     queue_ptr qptrs[GGML_SYCL_MAX_DEVICES][GGML_SYCL_MAX_STREAMS] = { { nullptr } };
-    static sycl::handler * sycl_handles[GGML_SYCL_MAX_DEVICES] = {nullptr};
 
     explicit ggml_backend_sycl_context(int device) :
         device(device),
         name(GGML_SYCL_NAME + std::to_string(device)) {
     }
 
-    ~ggml_backend_sycl_context() {
-        for (int i = 0; i < GGML_SYCL_MAX_DEVICES; ++i) {
-            for (int j = 0; j < GGML_SYCL_MAX_STREAMS; ++j) {
-                if (qptrs[i][j] != nullptr) {
-                    SYCL_CHECK(free(qptrs[i][j]));
-                }
-            }
-            if (cublas_handles[i] != nullptr) {
-                SYCL_CHECK(free(sycl_handles[i]));
-            }
-        }
-    }
-
     queue_ptr stream(int device, int stream) {
         if (qptrs[device][stream] == nullptr) {
-            SYCL_CHECK(dpct::get_current_device().create_queue(
-                        g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
+            qptrs[device][stream] = (dpct::get_current_device().create_queue(
+                        g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device()));
         }
         return qptrs[device][stream];
     }
@@ -519,27 +484,14 @@ struct ggml_backend_sycl_context {
         return stream(device, 0);
     }
 
-    handle_ptr sycl_handle(int device) {
-        if (sycl_handles[device] == nullptr) {
-            const dpct::queue_ptr stream = qptrs[device][0];
-            // create sycl handle
-            SYCL_CHECK(CHECK_TRY_ERROR(sycl_handles[device] = stream));
-        }
-        return sycl_handles[device];
-    }
-
-    handle_ptr sycl_handle() {
-        return sycl_handle(device);
-    }
-
     // pool
     std::unique_ptr<ggml_sycl_pool> pools[GGML_SYCL_MAX_DEVICES];
 
     static std::unique_ptr<ggml_sycl_pool> new_pool_for_device(queue_ptr qptr, int device);
 
     ggml_sycl_pool & pool(int device) {
         if (pools[device] == nullptr) {
-            pools[device] = new_pool_for_device(qptrs[device][0], device);
+            pools[device] = new_pool_for_device(stream(device,0), device);
         }
         return *pools[device];
     }

diff --git a/ggml-sycl/dmmv.cpp b/ggml-sycl/dmmv.cpp
@@ -943,6 +943,7 @@ static void dequantize_mul_mat_vec_q6_K_sycl(const void *vx, const float *y,
 }
 
 void ggml_sycl_op_dequantize_mul_mat_vec(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
     const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
     float *dst_dd_i, const int64_t row_low, const int64_t row_high,

diff --git a/ggml-sycl/dmmv.hpp b/ggml-sycl/dmmv.hpp
@@ -17,6 +17,7 @@
 
 
 void ggml_sycl_op_dequantize_mul_mat_vec(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
     const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
     float *dst_dd_i, const int64_t row_low, const int64_t row_high,

diff --git a/ggml-sycl/mmq.cpp b/ggml-sycl/mmq.cpp
@@ -2960,6 +2960,7 @@ catch (sycl::exception const &exc) {
 }
 
 void ggml_sycl_op_mul_mat_q(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
     const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
     float *dst_dd_i, const int64_t row_low, const int64_t row_high,

diff --git a/ggml-sycl/mmq.hpp b/ggml-sycl/mmq.hpp
@@ -16,6 +16,7 @@
 #include "common.hpp"
 
 void ggml_sycl_op_mul_mat_q(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor* src0,
     const ggml_tensor* src1,
     ggml_tensor* dst,

diff --git a/ggml-sycl/mmvq.cpp b/ggml-sycl/mmvq.cpp
@@ -932,6 +932,7 @@ static void mul_mat_vec_iq4_xs_q8_1_sycl(const void *vx, const void *vy,
 }
 
 void ggml_sycl_op_mul_mat_vec_q(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
     const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
     float *dst_dd_i, const int64_t row_low, const int64_t row_high,

diff --git a/ggml-sycl/mmvq.hpp b/ggml-sycl/mmvq.hpp
@@ -17,6 +17,7 @@
 
 
 void ggml_sycl_op_mul_mat_vec_q(
+    ggml_backend_sycl_context & ctx,
     const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
     const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
     float *dst_dd_i, const int64_t row_low, const int64_t row_high,