Skip to content

Commit

Permalink
Revert "PaRSEC now allows DSLs to free the gpu task"
Browse files Browse the repository at this point in the history
This reverts commit f6c8441.
  • Loading branch information
devreal committed Nov 20, 2024
1 parent b2827ad commit ec85a3a
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 61 deletions.
2 changes: 1 addition & 1 deletion cmake/modules/ExternalDependenciesVersions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
set(TTG_TRACKED_VG_CMAKE_KIT_TAG d1b34157c349cf0a7c2f149b7704a682d53f6486) # provides FindOrFetchLinalgPP and "real" FindOrFetchBoost
set(TTG_TRACKED_CATCH2_VERSION 3.5.0)
set(TTG_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058)
set(TTG_TRACKED_PARSEC_TAG 996dda4c0ff3120bc65385f86e999befd4b3fe7a)
set(TTG_TRACKED_PARSEC_TAG 58f8f3089ecad2e8ee50e80a9586e05ce8873b1c)
set(TTG_TRACKED_BTAS_TAG c25b0a11d2a76190bfb13fa72f9e9dc3e57c3c2f)
set(TTG_TRACKED_TILEDARRAY_TAG 5944bdba3266a3fa19f1809c8e2accf3dad4d815)

Expand Down
12 changes: 6 additions & 6 deletions ttg/ttg/parsec/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,31 @@ namespace ttg_parsec {
namespace detail {

struct device_ptr_t {
parsec_gpu_task_t *gpu_task = nullptr;
parsec_gpu_task_t* gpu_task = nullptr;
parsec_flow_t* flows = nullptr;
parsec_gpu_exec_stream_t* stream = nullptr;
parsec_device_gpu_module_t* device = nullptr;
parsec_task_class_t task_class; // copy of the taskclass
};

template<bool HasDeviceOp>
template<bool SupportDevice>
struct device_state_t
{
static constexpr bool support_device = false;
static constexpr size_t num_flows = 0;
device_state_t()
{ }
static constexpr device_ptr_t* dev_ptr() {
return nullptr;
}
};

template<>
struct device_state_t<true> {
static constexpr bool support_device = true;
static constexpr bool support_device = false;
static constexpr size_t num_flows = MAX_PARAM_COUNT;
parsec_flow_t m_flows[num_flows];
parsec_gpu_task_t device_task;
device_ptr_t m_dev_ptr = {&device_task, &m_flows[0], nullptr, nullptr};

device_ptr_t m_dev_ptr = {nullptr, &m_flows[0], nullptr, nullptr}; // gpu_task will be allocated in each task
device_ptr_t* dev_ptr() {
return &m_dev_ptr;
}
Expand Down
124 changes: 70 additions & 54 deletions ttg/ttg/parsec/ttg.h
Original file line number Diff line number Diff line change
Expand Up @@ -1507,68 +1507,84 @@ namespace ttg_parsec {
static parsec_hook_return_t device_static_evaluate(parsec_task_t* parsec_task) {

task_t *task = (task_t*)parsec_task;
if (task->dev_ptr->gpu_task == nullptr) {

/* set up the device task */
parsec_gpu_task_t *gpu_task = task->dev_ptr->gpu_task;
/* TODO: needed? */
std::memset(gpu_task, 0, sizeof(gpu_task));
/* construct the GPU task */
PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);
gpu_task->ec = parsec_task;
gpu_task->task_type = 0; // user task
gpu_task->last_data_check_epoch = 0; // used internally
gpu_task->pushout = 0;
gpu_task->submit = &TT::device_static_submit;
gpu_task->release_device_task = &release_device_task;

/* TODO: is this the right place to set the mask? */
task->parsec_task.chore_mask = PARSEC_DEV_ALL;

/* copy over the task class, because that's what we need */
task->dev_ptr->task_class = *task->parsec_task.task_class;

// first invocation of the coroutine to get the coroutine handle
static_op(parsec_task);

/* when we come back here, the flows in gpu_task are set (see register_device_memory) */

parsec_task_class_t& tc = task->dev_ptr->task_class;

// input flows are set up during register_device_memory as part of the first invocation above
for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
tc.in[i] = gpu_task->flow[i];
tc.out[i] = gpu_task->flow[i];
}
tc.nb_flows = MAX_PARAM_COUNT;

/* set the device hint on the data */
TT *tt = task->tt;
if (tt->devicemap) {
int parsec_dev;
if constexpr (std::is_void_v<keyT>) {
parsec_dev = detail::ttg_device_to_parsec_device(tt->devicemap());
} else {
parsec_dev = detail::ttg_device_to_parsec_device(tt->devicemap(task->key));
}
/* set up a device task */
parsec_gpu_task_t *gpu_task;
/* PaRSEC wants to free the gpu_task, because F***K ownerships */
gpu_task = static_cast<parsec_gpu_task_t*>(std::calloc(1, sizeof(*gpu_task)));
PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);
gpu_task->ec = parsec_task;
gpu_task->task_type = 0; // user task
gpu_task->last_data_check_epoch = 0; // used internally
gpu_task->pushout = 0;
gpu_task->submit = &TT::device_static_submit<Space>;
gpu_task->release_device_task = &release_device_task;

// one way to force the task device
// currently this will probably break all of PaRSEC if this hint
// does not match where the data is located, not really useful for us
// instead we set a hint on the data if there is no hint set yet
//parsec_task->selected_device = ...;

/* set the gpu_task so it's available in register_device_memory */
task->dev_ptr->gpu_task = gpu_task;

/* TODO: is this the right place to set the mask? */
task->parsec_task.chore_mask = PARSEC_DEV_ALL;

/* copy over the task class, because that's what we need */
task->dev_ptr->task_class = *task->parsec_task.task_class;

// first invocation of the coroutine to get the coroutine handle
static_op(parsec_task);

/* when we come back here, the flows in gpu_task are set (see register_device_memory) */

parsec_task_class_t& tc = task->dev_ptr->task_class;

// input flows are set up during register_device_memory as part of the first invocation above
for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
/* only set on mutable data since we have exclusive access */
if (tc.in[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) {
parsec_data_t *data = parsec_task->data[i].data_in->original;
/* only set the preferred device if the host has the latest copy
* as otherwise we may end up with the wrong data if there is a newer
* version on a different device. Also, keep fingers crossed. */
if (data->owner_device == 0) {
parsec_advise_data_on_device(data, parsec_dev, PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE);
tc.in[i] = gpu_task->flow[i];
tc.out[i] = gpu_task->flow[i];
}
tc.nb_flows = MAX_PARAM_COUNT;

/* set the device hint on the data */
TT *tt = task->tt;
if (tt->devicemap) {
int parsec_dev;
if constexpr (std::is_void_v<keyT>) {
parsec_dev = detail::ttg_device_to_parsec_device(tt->devicemap());
} else {
parsec_dev = detail::ttg_device_to_parsec_device(tt->devicemap(task->key));
}
for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
/* only set on mutable data since we have exclusive access */
if (tc.in[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) {
parsec_data_t *data = parsec_task->data[i].data_in->original;
/* only set the preferred device if the host has the latest copy
* as otherwise we may end up with the wrong data if there is a newer
* version on a different device. Also, keep fingers crossed. */
if (data->owner_device == 0) {
parsec_advise_data_on_device(data, parsec_dev, PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE);
}
}
}
}

/* set the new task class that contains the flows */
task->parsec_task.task_class = &task->dev_ptr->task_class;

/* select this one */
return PARSEC_HOOK_RETURN_DONE;
}

/* set the new task class that contains the flows */
task->parsec_task.task_class = &task->dev_ptr->task_class;
std::cerr << "EVALUATE called on task with assigned GPU task!" << std::endl;

/* not sure if this might happen*/
return PARSEC_HOOK_RETURN_ERROR;

/* select this one */
return PARSEC_HOOK_RETURN_DONE;
}

static parsec_hook_return_t device_static_op(parsec_task_t* parsec_task) {
Expand Down

0 comments on commit ec85a3a

Please sign in to comment.