Skip to content

Commit

Permalink
Merge pull request TESSEorg#305 from devreal/nvcc-warnings
Browse files Browse the repository at this point in the history
Nvcc warnings
  • Loading branch information
therault authored Nov 13, 2024
2 parents c22faf9 + a9bb37a commit 3636049
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 52 deletions.
6 changes: 2 additions & 4 deletions ttg/ttg/device/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ namespace ttg::device {
}
}

template <size_t KeyId, size_t I, size_t... Is, typename... RangesT, typename valueT,
typename... out_keysT, typename... out_valuesT>
template <size_t KeyId, size_t I, size_t... Is, typename... RangesT, typename valueT>
inline void broadcast(const std::tuple<RangesT...> &keylists, valueT &&value) {
using key_t = typename broadcast_keylist_trait<
std::tuple_element_t<KeyId, std::tuple<std::remove_reference_t<RangesT>...>>
Expand Down Expand Up @@ -379,8 +378,7 @@ namespace ttg::device {
}
}

template <size_t KeyId, size_t I, size_t... Is, typename... RangesT,
typename... out_keysT, typename... out_valuesT>
template <size_t KeyId, size_t I, size_t... Is, typename... RangesT>
inline void broadcastk(const std::tuple<RangesT...> &keylists) {
using key_t = typename broadcast_keylist_trait<
std::tuple_element_t<KeyId, std::tuple<std::remove_reference_t<RangesT>...>>
Expand Down
8 changes: 3 additions & 5 deletions ttg/ttg/func.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,17 +416,15 @@ namespace ttg {
std::get<i>(t).broadcast(keylist, copy_handler(std::forward<valueT>(value)));
}

template <typename rangeT, typename valueT, typename... out_keysT, typename... out_valuesT,
ttg::Runtime Runtime = ttg::ttg_runtime>
template <typename rangeT, typename valueT, ttg::Runtime Runtime = ttg::ttg_runtime>
inline void broadcast(std::size_t i, const rangeT &keylist, valueT &&value) {
detail::value_copy_handler<Runtime> copy_handler;
using key_t = decltype(*std::begin(keylist));
auto *terminal_ptr = detail::get_out_terminal<key_t, valueT>(i, "ttg::broadcast(keylist, value)");
terminal_ptr->broadcast(keylist, copy_handler(std::forward<valueT>(value)));
}

template <size_t i, typename rangeT, typename valueT, typename... out_keysT, typename... out_valuesT,
ttg::Runtime Runtime = ttg::ttg_runtime>
template <size_t i, typename rangeT, typename valueT, ttg::Runtime Runtime = ttg::ttg_runtime>
inline void broadcast(const rangeT &keylist, valueT &&value) {
broadcast(i, keylist, std::forward<valueT>(value));
}
Expand Down Expand Up @@ -505,7 +503,7 @@ namespace ttg {
terminal_ptr->set_size(size);
}

template <size_t i, typename keyT, typename... out_keysT, typename... out_valuesT>
template <size_t i, typename keyT>
inline std::enable_if_t<!meta::is_void_v<keyT>, void> set_size(const keyT &key, const std::size_t size) {
set_size(i, key, size);
}
Expand Down
1 change: 0 additions & 1 deletion ttg/ttg/parsec/devicefunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ namespace ttg_parsec {

/* get_parsec_data is overloaded for buffer and devicescratch */
parsec_data_t* data = detail::get_parsec_data(view);
parsec_gpu_task_t *gpu_task = detail::parsec_ttg_caller->dev_ptr->gpu_task;
parsec_gpu_exec_stream_t *stream = detail::parsec_ttg_caller->dev_ptr->stream;

/* enqueue the transfer into the compute stream to come back once the compute and transfer are complete */
Expand Down
50 changes: 10 additions & 40 deletions ttg/ttg/parsec/ttg.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ namespace ttg_parsec {
MSG_SET_ARGSTREAM_SIZE = 1,
MSG_FINALIZE_ARGSTREAM_SIZE = 2,
MSG_GET_FROM_PULL = 3 } fn_id_t;
uint32_t taskpool_id = -1;
uint64_t op_id = -1;
uint32_t taskpool_id = std::numeric_limits<uint32_t>::max();
uint64_t op_id = std::numeric_limits<uint64_t>::max();
std::size_t key_offset = 0;
fn_id_t fn_id = MSG_INVALID;
std::int8_t num_iovecs = 0;
Expand Down Expand Up @@ -334,7 +334,7 @@ namespace ttg_parsec {
void create_tpool() {
assert(nullptr == tpool);
tpool = PARSEC_OBJ_NEW(parsec_taskpool_t);
tpool->taskpool_id = -1;
tpool->taskpool_id = std::numeric_limits<uint32_t>::max();
tpool->update_nb_runtime_task = parsec_add_fetch_runtime_task;
tpool->taskpool_type = PARSEC_TASKPOOL_TYPE_TTG;
tpool->taskpool_name = strdup("TTG Taskpool");
Expand Down Expand Up @@ -701,7 +701,7 @@ namespace ttg_parsec {
template <typename Value>
inline ttg_data_copy_t *create_new_datacopy(Value &&value) {
using value_type = std::decay_t<Value>;
ttg_data_copy_t *copy;
ttg_data_copy_t *copy = nullptr;
if constexpr (std::is_base_of_v<ttg::TTValue<value_type>, value_type> &&
std::is_constructible_v<value_type, decltype(value)>) {
copy = new value_type(std::forward<Value>(value));
Expand Down Expand Up @@ -1472,36 +1472,6 @@ namespace ttg_parsec {
return rc;
}

static void
static_device_stage_in(parsec_gpu_task_t *gtask,
uint32_t flow_mask,
parsec_gpu_exec_stream_t *gpu_stream) {
/* register any memory that hasn't been registered yet */
for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
if (flow_mask & (1<<i)) {
task_t *task = (task_t*)gtask->ec;
parsec_data_copy_t *copy = task->parsec_task.data[i].data_in;
if (0 == (copy->flags & TTG_PARSEC_DATA_FLAG_REGISTERED)) {
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
// register host memory for faster device access
cudaError_t status;
//status = cudaHostRegister(copy->device_private, gtask->flow_nb_elts[i], cudaHostRegisterPortable);
//assert(cudaSuccess == status);
#endif // PARSEC_HAVE_DEV_CUDA_SUPPORT
//copy->flags |= TTG_PARSEC_DATA_FLAG_REGISTERED;
}
}
}
}

static int
static_device_stage_in_hook(parsec_gpu_task_t *gtask,
uint32_t flow_mask,
parsec_gpu_exec_stream_t *gpu_stream) {
static_device_stage_in(gtask, flow_mask, gpu_stream);
return parsec_default_gpu_stage_in(gtask, flow_mask, gpu_stream);
}

template <ttg::ExecutionSpace Space>
static parsec_hook_return_t device_static_evaluate(parsec_task_t* parsec_task) {

Expand All @@ -1515,7 +1485,7 @@ namespace ttg_parsec {
PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);
gpu_task->ec = parsec_task;
gpu_task->task_type = 0; // user task
gpu_task->last_data_check_epoch = -1; // used internally
gpu_task->last_data_check_epoch = 0; // used internally
gpu_task->pushout = 0;
gpu_task->submit = &TT::device_static_submit<Space>;

Expand Down Expand Up @@ -1624,7 +1594,7 @@ namespace ttg_parsec {
if constexpr (Space == ttg::ExecutionSpace::CUDA) {
/* TODO: we need custom staging functions because PaRSEC looks at the
* task-class to determine the number of flows. */
gpu_task->stage_in = static_device_stage_in_hook;
gpu_task->stage_in = parsec_default_gpu_stage_in;
gpu_task->stage_out = parsec_default_gpu_stage_out;
return parsec_device_kernel_scheduler(&device->super, es, gpu_task);
}
Expand All @@ -1633,7 +1603,7 @@ namespace ttg_parsec {
#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
case PARSEC_DEV_HIP:
if constexpr (Space == ttg::ExecutionSpace::HIP) {
gpu_task->stage_in = static_device_stage_in_hook;
gpu_task->stage_in = parsec_default_gpu_stage_in;
gpu_task->stage_out = parsec_default_gpu_stage_out;
return parsec_device_kernel_scheduler(&device->super, es, gpu_task);
}
Expand All @@ -1642,7 +1612,7 @@ namespace ttg_parsec {
#if defined(PARSEC_HAVE_DEV_LEVEL_ZERO_SUPPORT)
case PARSEC_DEV_LEVEL_ZERO:
if constexpr (Space == ttg::ExecutionSpace::L0) {
gpu_task->stage_in = static_device_stage_in_hook;
gpu_task->stage_in = parsec_default_gpu_stage_in;
gpu_task->stage_out = parsec_default_gpu_stage_out;
return parsec_device_kernel_scheduler(&device->super, es, gpu_task);
}
Expand Down Expand Up @@ -2399,7 +2369,9 @@ namespace ttg_parsec {
auto &reducer = std::get<i>(input_reducers);
bool release = false;
bool remove_from_hash = true;
#if defined(PARSEC_PROF_GRAPHER)
bool discover_task = true;
#endif
bool get_pull_data = false;
bool has_lock = false;
/* If we have only one input and no reducer on that input we can skip the hash table */
Expand Down Expand Up @@ -2795,7 +2767,6 @@ namespace ttg_parsec {
num_iovecs = std::distance(std::begin(iovs), std::end(iovs));
/* pack the metadata */
auto metadata = descr.get_metadata(*const_cast<decvalueT *>(value_ptr));
size_t metadata_size = sizeof(metadata);
pos = pack(metadata, msg->bytes, pos);
//std::cout << "set_arg_impl splitmd num_iovecs " << num_iovecs << std::endl;
handle_iovec_fn(iovs);
Expand Down Expand Up @@ -2970,7 +2941,6 @@ namespace ttg_parsec {
ttg::SplitMetadataDescriptor<decvalueT> descr;
/* pack the metadata */
auto metadata = descr.get_metadata(value);
size_t metadata_size = sizeof(metadata);
pos = pack(metadata, msg->bytes, pos);
auto iovs = descr.get_data(*const_cast<decvalueT *>(&value));
num_iovs = std::distance(std::begin(iovs), std::end(iovs));
Expand Down
3 changes: 1 addition & 2 deletions ttg/ttg/util/dot.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,12 @@ namespace ttg {

void ttfunc(TTBase *tt) {
std::string ttnm = nodename(tt);
bool is_ttg = true;

const TTBase *ttc = reinterpret_cast<const TTBase*>(tt);
build_ttg_hierarchy(ttc);
if(!tt->is_ttg()) {
std::stringstream ttss;

ttss << " " << ttnm << " [shape=record,style=filled,fillcolor=gray90,label=\"{";

size_t count = 0;
Expand Down

0 comments on commit 3636049

Please sign in to comment.