Skip to content

Commit

Permalink
Proper use of device ID conversion functions
Browse files Browse the repository at this point in the history
Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
devreal committed Nov 27, 2023
1 parent 78876e5 commit cf757ee
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 33 deletions.
32 changes: 16 additions & 16 deletions examples/potrf/potrf.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ namespace potrf {
//std::cout << "POTRF handle " << handle << " device " << device << " stream " << ttg::device::current_stream() << std::endl;
cusolverDnDpotrf(handle,
CUBLAS_FILL_MODE_LOWER, A.cols(),
A.buffer().device_ptr_on(device), A.lda(),
A.buffer().current_device_ptr(), A.lda(),
workspace, Lwork,
devInfo);
#elif defined(TTG_HAVE_HIPBLAS)
hipsolverDpotrf(hipsolver_handle(),
HIPSOLVER_FILL_MODE_LOWER, A.cols(),
A.buffer().device_ptr_on(device), A.lda(),
A.buffer().current_device_ptr(), A.lda(),
workspace, Lwork,
devInfo);
#endif
Expand Down Expand Up @@ -290,15 +290,15 @@ namespace potrf {
CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER,
CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT,
mb, nb, &alpha,
tile_kk.buffer().device_ptr_on(device), tile_kk.lda(),
tile_mk.buffer().device_ptr_on(device), tile_mk.lda());
tile_kk.buffer().current_device_ptr(), tile_kk.lda(),
tile_mk.buffer().current_device_ptr(), tile_mk.lda());
#elif defined(TTG_HAVE_HIPBLAS)
hipblasDtrsm(hipblas_handle(),
HIPBLAS_SIDE_RIGHT, HIPBLAS_FILL_MODE_LOWER,
HIPBLAS_OP_T, HIPBLAS_DIAG_NON_UNIT,
mb, nb, &alpha,
tile_kk.buffer().device_ptr_on(device), tile_kk.lda(),
tile_mk.buffer().device_ptr_on(device), tile_mk.lda());
tile_kk.buffer().current_device_ptr(), tile_kk.lda(),
tile_mk.buffer().current_device_ptr(), tile_mk.lda());

#endif

Expand Down Expand Up @@ -420,15 +420,15 @@ namespace potrf {
CUBLAS_FILL_MODE_LOWER,
CUBLAS_OP_N,
mb, nb, &alpha,
tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), &beta,
tile_kk.buffer().device_ptr_on(device), tile_kk.lda());
tile_mk.buffer().current_device_ptr(), tile_mk.lda(), &beta,
tile_kk.buffer().current_device_ptr(), tile_kk.lda());
#elif defined(TTG_HAVE_HIPBLAS)
hipblasDsyrk(hipblas_handle(),
HIPBLAS_FILL_MODE_LOWER,
HIPBLAS_OP_N,
mb, nb, &alpha,
tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), &beta,
tile_kk.buffer().device_ptr_on(device), tile_kk.lda());
tile_mk.buffer().current_device_ptr(), tile_mk.lda(), &beta,
tile_kk.buffer().current_device_ptr(), tile_kk.lda());
#endif

#ifdef DEBUG_TILES_VALUES
Expand Down Expand Up @@ -545,17 +545,17 @@ namespace potrf {
CUBLAS_OP_N, CUBLAS_OP_T,
tile_mk.rows(), tile_nk.rows(),
tile_nk.cols(), &alpha,
tile_mk.buffer().device_ptr_on(device), tile_mk.lda(),
tile_nk.buffer().device_ptr_on(device), tile_nk.lda(), &beta,
tile_mn.buffer().device_ptr_on(device), tile_mn.lda());
tile_mk.buffer().current_device_ptr(), tile_mk.lda(),
tile_nk.buffer().current_device_ptr(), tile_nk.lda(), &beta,
tile_mn.buffer().current_device_ptr(), tile_mn.lda());
#elif defined(TTG_HAVE_HIPBLAS)
hipblasDgemm(hipblas_handle(),
HIPBLAS_OP_N, HIPBLAS_OP_T,
tile_mk.rows(), tile_nk.rows(),
tile_nk.cols(), &alpha,
tile_mk.buffer().device_ptr_on(device), tile_mk.lda(),
tile_nk.buffer().device_ptr_on(device), tile_nk.lda(), &beta,
tile_mn.buffer().device_ptr_on(device), tile_mn.lda());
tile_mk.buffer().current_device_ptr(), tile_mk.lda(),
tile_nk.buffer().current_device_ptr(), tile_nk.lda(), &beta,
tile_mn.buffer().current_device_ptr(), tile_mn.lda());
#endif


Expand Down
14 changes: 7 additions & 7 deletions ttg/ttg/parsec/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t
/* make sure it's a valid device */
assert(parsec_nb_devices > device_id);
/* make sure it's a valid copy */
int parsec_id = detail::device_to_parsec_device(device);
int parsec_id = detail::ttg_device_to_parsec_device(device);
assert(m_data->device_copies[parsec_id] != nullptr);
m_data->owner_device = parsec_id;
}
Expand All @@ -173,20 +173,20 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t
* device buffer. */
ttg::device::Device get_owner_device() const {
assert(is_valid());
return detail::parsec_device_to_device(m_data->owner_device);
return detail::parsec_device_to_ttg_device(m_data->owner_device);
}

/* Get the pointer on the currently active device. */
element_type* current_device_ptr() {
assert(is_valid());
int device_id = ttg::device::current_device()+detail::first_device_id;
int device_id = detail::ttg_device_to_parsec_device(ttg::device::current_device());
return static_cast<element_type*>(m_data->device_copies[device_id]->device_private);
}

/* Get the pointer on the currently active device. */
const element_type* current_device_ptr() const {
assert(is_valid());
int device_id = ttg::device::current_device()+detail::first_device_id;
int device_id = detail::ttg_device_to_parsec_device(ttg::device::current_device());
return static_cast<element_type*>(m_data->device_copies[device_id]->device_private);
}

Expand All @@ -208,15 +208,15 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t
*/
element_type* device_ptr_on(const ttg::device::Device& device) {
assert(is_valid());
int device_id = detail::device_to_parsec_device(device);
int device_id = detail::ttg_device_to_parsec_device(device);
return static_cast<element_type*>(parsec_data_get_ptr(m_data.get(), device_id));
}

/* get the device pointer at the given device
*/
const element_type* device_ptr_on(const ttg::device::Device& device) const {
assert(is_valid());
int device_id = detail::device_to_parsec_device(device);
int device_id = detail::ttg_device_to_parsec_device(device);
return static_cast<element_type*>(parsec_data_get_ptr(m_data.get(), device_id));
}

Expand All @@ -230,7 +230,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t

bool is_valid_on(const ttg::device::Device& device) const {
assert(is_valid());
int device_id = detail::device_to_parsec_device(device);
int device_id = detail::ttg_device_to_parsec_device(device);
return (parsec_data_get_ptr(m_data.get(), device_id) != nullptr);
}

Expand Down
6 changes: 3 additions & 3 deletions ttg/ttg/parsec/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ namespace ttg_parsec {
namespace detail {

// the first ID of an accelerator in the parsec ID-space
inline int first_device_id = 0;
inline int first_device_id = -1;

/**
* map from TTG ID-space to parsec ID-space
*/
inline
int device_to_parsec_device(const ttg::device::Device& device) {
int ttg_device_to_parsec_device(const ttg::device::Device& device) {
if (device.is_host()) {
return 0;
} else {
Expand All @@ -26,7 +26,7 @@ namespace ttg_parsec {
* map from parsec ID-space to TTG ID-space
*/
inline
ttg::device::Device parsec_device_to_device(int parsec_id) {
ttg::device::Device parsec_device_to_ttg_device(int parsec_id) {
if (parsec_id < first_device_id) {
return ttg::device::Device(parsec_id, ttg::ExecutionSpace::Host);
}
Expand Down
17 changes: 10 additions & 7 deletions ttg/ttg/parsec/ttg.h
Original file line number Diff line number Diff line change
Expand Up @@ -989,10 +989,13 @@ namespace ttg_parsec {
ttg::detail::set_default_world(std::move(world));

// query the first device ID
detail::first_device_id = -1;
for (int i = 0; i < parsec_nb_devices; ++i) {
if (parsec_mca_device_is_gpu(i)) {
bool is_gpu = parsec_mca_device_is_gpu(i);
if (detail::first_device_id == -1 && is_gpu) {
detail::first_device_id = i;
break;
} else if (detail::first_device_id > -1 && !is_gpu) {
throw std::runtime_error("PaRSEC: Found non-GPU device in GPU ID range!");
}
}
}
Expand Down Expand Up @@ -1362,15 +1365,15 @@ namespace ttg_parsec {
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA)
{
parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t *)gpu_stream;
int device = gpu_device->super.device_index - detail::first_device_id;
int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index);
ttg::device::detail::set_current(device, cuda_stream->cuda_stream);
}
#endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA)

#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT) && defined(TTG_HAVE_HIP)
{
parsec_hip_exec_stream_t *hip_stream = (parsec_hip_exec_stream_t *)gpu_stream;
int device = gpu_device->super.device_index - detail::first_device_id;
int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index);
ttg::device::detail::set_current(device, hip_stream->hip_stream);
}
#endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA)
Expand All @@ -1379,7 +1382,7 @@ namespace ttg_parsec {
{
parsec_level_zero_exec_stream_t *stream;
stream = (parsec_level_zero_exec_stream_t *)gpu_stream;
int device = gpu_device->super.device_index - detail::first_device_id;
int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index);
ttg::device::detail::set_current(device, stream->swq->queue);
}
#endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA)
Expand Down Expand Up @@ -1512,7 +1515,7 @@ namespace ttg_parsec {

gpu_task->load = task_load;
assert(dev_index >= 0);
if (dev_index < detail::first_device_id) {
if (!parsec_mca_device_is_gpu(dev_index)) {
return PARSEC_HOOK_RETURN_NEXT; /* Fall back */
}

Expand Down Expand Up @@ -1997,7 +2000,7 @@ ttg::abort(); // should not happen
// TODO: first attempt at sending directly to the device
parsec_gpu_data_copy_t* gpu_elem;
gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index);
int i = detail::first_device_id;ß
int i = detail::first_device_id;
int devid = detail::first_device_id;
while (i < parsec_nb_devices) {
if (nullptr == gpu_elem) {
Expand Down

0 comments on commit cf757ee

Please sign in to comment.