Skip to content

Commit

Permalink
fix dpct create_queue to return queue* and use std::make_share<>
Browse files Browse the repository at this point in the history
  • Loading branch information
NeoZhangJianyu committed Jun 30, 2024
1 parent 6da656f commit d95c04a
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 53 deletions.
26 changes: 15 additions & 11 deletions ggml/src/ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void print_device_detail_part1(int id, sycl::device &device, std::string device_

auto global_mem_size = prop.get_global_mem_size()/1000000;

fprintf(stderr, "|%2d|%4s|%19s|%39s|%14luM|\n", id, version.c_str(), device_type.c_str(),
fprintf(stderr, "|%2d|%19s|%4s|%39s|%14luM|\n", id, device_type.c_str(), version.c_str(),
name.c_str(), global_mem_size);
}

Expand All @@ -127,8 +127,8 @@ void ggml_backend_sycl_print_sycl_devices() {
std::map<std::string, size_t> DeviceNums;
fprintf(stderr, "found %d SYCL devices:\n", device_count);
fprintf(stderr, "Part1:\n");
fprintf(stderr, "|ID| Ver| Device Type| Name|Global mem size|\n");
fprintf(stderr, "|--|----|-------------------|---------------------------------------|---------------|\n");
fprintf(stderr, "|ID| Device Type| Ver| Name|Global mem size|\n");
fprintf(stderr, "|--|-------------------|----|---------------------------------------|---------------|\n");
for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
sycl::backend backend = device.get_backend();
Expand Down Expand Up @@ -246,26 +246,30 @@ device-to-device copy in the future.
void sycl_device_mgr::create_context_for_group_gpus() {
sycl::context ctx = sycl::context(devices);
assert(device_ids.size() > 0);
first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
first_queue = _create_queue_ptr(devices[0]);
sycl::context ctx0 = first_queue->get_context();
for (int i = 0; i < device_ids.size(); i++) {
ctxs.push_back(ctx0);
}
}

sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::context &ctx,
sycl::device &device) {
sycl::queue *sycl_device_mgr::_create_queue_ptr(sycl::device device) {
auto q = dpct::get_current_device().create_queue(device);
return q;
// _queues.push_back(q);
// return & _queues.back();
}

sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::device &device) {
dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
auto res = dpct::get_current_device().create_queue(ctx, device);
return res;
auto qptr = _create_queue_ptr(device);
return qptr;
}

sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
int i = get_device_index(device_id);
sycl::context ctx = ctxs[i];
sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
;
return create_queue_for_device(ctx, device);
return create_queue_for_device(device);
}

int sycl_device_mgr::get_device_index(int device_id) {
Expand Down
24 changes: 12 additions & 12 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ static void* g_scratch_buffer = nullptr;
static size_t g_scratch_size = 0; // disabled by default
static size_t g_scratch_offset = 0;

int get_current_device_id();

[[noreturn]] static inline void bad_arch(const sycl::stream& stream_ct1) {
stream_ct1 << "ERROR: ggml-sycl was compiled without support for the "
"current GPU architecture.\n";
Expand All @@ -159,16 +161,6 @@ static size_t g_scratch_offset = 0;
(void)bad_arch; // suppress unused function warning
}



int get_current_device_id();
static inline int get_sycl_env(const char *env_name, int default_val);
static inline bool env_existed(const char *env_name);

static inline void exit_with_stack_print() {
SYCL_CHECK(SYCL_RETURN_ERROR);
}

inline dpct::err0 ggml_sycl_set_device(const int device_id) try {

int current_device_id;
Expand All @@ -195,14 +187,15 @@ class sycl_device_mgr {
std::vector<int> max_compute_units;
std::vector<int> work_group_sizes;
sycl::queue *first_queue;
std::vector<sycl::queue> _queues;
std::vector<sycl::context> ctxs;
std::string device_list = "";

sycl_device_mgr(ggml_sycl_backend_device_filter device_filter);

sycl::queue *_create_queue_ptr(sycl::device device); //internal API to hide dpct API.
void create_context_for_group_gpus();
sycl::queue *create_queue_for_device(sycl::context &ctx,
sycl::device &device);
sycl::queue *create_queue_for_device(sycl::device &device);
sycl::queue *create_queue_for_device_id(int device_id);
int get_device_index(int device_id);
void create_context_for_devices();
Expand Down Expand Up @@ -342,6 +335,13 @@ struct ggml_backend_sycl_context {
}
};

static inline void exit_with_stack_print() {
SYCL_CHECK(SYCL_RETURN_ERROR);
}


static inline int get_sycl_env(const char *env_name, int default_val);
static inline bool env_existed(const char *env_name);
void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr);
static std::vector<int> get_sycl_visible_devices();
Expand Down
72 changes: 42 additions & 30 deletions ggml/src/ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ namespace dpct
out = prop;
}
/// dpct device extension
/// dpct device extension
class device_ext : public sycl::device {
typedef std::mutex mutex_type;
Expand Down Expand Up @@ -687,119 +687,131 @@ namespace dpct
init_queues();
}
sycl::queue &in_order_queue() { return _q_in_order; }
sycl::queue &in_order_queue() { return *_q_in_order; }
sycl::queue &out_of_order_queue() { return _q_out_of_order; }
sycl::queue &out_of_order_queue() { return *_q_out_of_order; }
sycl::queue &default_queue() { return in_order_queue(); }
void queues_wait_and_throw() {
std::unique_lock<mutex_type> lock(m_mutex);
std::vector<std::shared_ptr<sycl::queue>> current_queues(
_queues);
lock.unlock();
for (auto &q : _queues) {
q.wait_and_throw();
for (const auto &q : current_queues)
{
q->wait_and_throw();
}
// Guard the destruct of current_queues to make sure the ref count is
// safe.
lock.lock();
}
sycl::queue create_queue(bool enable_exception_handler = false) {
sycl::queue *create_queue(bool enable_exception_handler = false) {
return create_in_order_queue(enable_exception_handler);
}
sycl::queue create_queue(sycl::device device,
sycl::queue *create_queue(sycl::device device,
bool enable_exception_handler = false) {
return create_in_order_queue(device, enable_exception_handler);
}
sycl::queue create_in_order_queue(bool enable_exception_handler = false) {
sycl::queue *create_in_order_queue(bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler,
sycl::property::queue::in_order());
}
sycl::queue create_in_order_queue(sycl::device device,
sycl::queue *create_in_order_queue(sycl::device device,
bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(device, enable_exception_handler,
sycl::property::queue::in_order());
}
sycl::queue create_out_of_order_queue(
sycl::queue *create_out_of_order_queue(
bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler);
}
void destroy_queue(sycl::queue queue) {
void destroy_queue(sycl::queue *&queue) {
std::lock_guard<mutex_type> lock(m_mutex);
_queues.clear();
_queues.erase(std::remove_if(_queues.begin(), _queues.end(),
[=](const std::shared_ptr<sycl::queue> &q) -> bool
{
return q.get() == queue;
}),
_queues.end());
queue = nullptr;
}
void set_saved_queue(sycl::queue q) {
void set_saved_queue(sycl::queue *q) {
std::lock_guard<mutex_type> lock(m_mutex);
_saved_queue = q;
}
sycl::queue get_saved_queue() const {
sycl::queue *get_saved_queue() const {
std::lock_guard<mutex_type> lock(m_mutex);
return _saved_queue;
}
private:
void clear_queues() { _queues.clear(); }
void clear_queues() {
_queues.clear();
_q_in_order = _q_out_of_order = _saved_queue = nullptr;
}
void init_queues() {
_q_in_order =
create_queue_impl(true, sycl::property::queue::in_order());
_q_out_of_order = create_queue_impl(true);
_saved_queue = default_queue();
_saved_queue = &default_queue();
}
/// Caller should acquire resource \p m_mutex before calling this
/// function.
template <class... Properties>
sycl::queue create_queue_impl(bool enable_exception_handler,
sycl::queue *create_queue_impl(bool enable_exception_handler,
Properties... properties) {
sycl::async_handler eh = {};
if (enable_exception_handler) {
eh = exception_handler;
}
auto q = sycl::queue(*this, eh,
sycl::property_list(
_queues.push_back(std::make_shared<sycl::queue>(
*this, eh,
sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(),
sycl::property::queue::enable_profiling(),
#endif
properties...));
_queues.push_back(q);
properties...)));
return _queues.back();
return _queues.back().get();
}
template <class... Properties>
sycl::queue create_queue_impl(sycl::device device,
sycl::queue *create_queue_impl(sycl::device device,
bool enable_exception_handler,
Properties... properties) {
sycl::async_handler eh = {};
if (enable_exception_handler) {
eh = exception_handler;
}
_queues.push_back(
sycl::queue(device, eh,
_queues.push_back(std::make_shared<sycl::queue>(
device, eh,
sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(),
#endif
properties...)));
return _queues.back();
return _queues.back().get();
}
void get_version(int &major, int &minor) const {
detail::get_version(*this, major, minor);
}
sycl::queue _q_in_order, _q_out_of_order;
sycl::queue _saved_queue;
std::vector<sycl::queue> _queues;
sycl::queue *_q_in_order, *_q_out_of_order;
sycl::queue *_saved_queue;
std::vector<std::shared_ptr<sycl::queue>> _queues;
mutable mutex_type m_mutex;
};
Expand Down

0 comments on commit d95c04a

Please sign in to comment.