diff --git a/examples/potrf/potrf.h b/examples/potrf/potrf.h index 369bc9a8d..00452f78f 100644 --- a/examples/potrf/potrf.h +++ b/examples/potrf/potrf.h @@ -58,13 +58,13 @@ namespace potrf { //std::cout << "POTRF handle " << handle << " device " << device << " stream " << ttg::device::current_stream() << std::endl; cusolverDnDpotrf(handle, CUBLAS_FILL_MODE_LOWER, A.cols(), - A.buffer().device_ptr_on(device), A.lda(), + A.buffer().current_device_ptr(), A.lda(), workspace, Lwork, devInfo); #elif defined(TTG_HAVE_HIPBLAS) hipsolverDpotrf(hipsolver_handle(), HIPSOLVER_FILL_MODE_LOWER, A.cols(), - A.buffer().device_ptr_on(device), A.lda(), + A.buffer().current_device_ptr(), A.lda(), workspace, Lwork, devInfo); #endif @@ -290,15 +290,15 @@ namespace potrf { CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, mb, nb, &alpha, - tile_kk.buffer().device_ptr_on(device), tile_kk.lda(), - tile_mk.buffer().device_ptr_on(device), tile_mk.lda()); + tile_kk.buffer().current_device_ptr(), tile_kk.lda(), + tile_mk.buffer().current_device_ptr(), tile_mk.lda()); #elif defined(TTG_HAVE_HIPBLAS) hipblasDtrsm(hipblas_handle(), HIPBLAS_SIDE_RIGHT, HIPBLAS_FILL_MODE_LOWER, HIPBLAS_OP_T, HIPBLAS_DIAG_NON_UNIT, mb, nb, &alpha, - tile_kk.buffer().device_ptr_on(device), tile_kk.lda(), - tile_mk.buffer().device_ptr_on(device), tile_mk.lda()); + tile_kk.buffer().current_device_ptr(), tile_kk.lda(), + tile_mk.buffer().current_device_ptr(), tile_mk.lda()); #endif @@ -420,15 +420,15 @@ namespace potrf { CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, mb, nb, &alpha, - tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), &beta, - tile_kk.buffer().device_ptr_on(device), tile_kk.lda()); + tile_mk.buffer().current_device_ptr(), tile_mk.lda(), &beta, + tile_kk.buffer().current_device_ptr(), tile_kk.lda()); #elif defined(TTG_HAVE_HIPBLAS) hipblasDsyrk(hipblas_handle(), HIPBLAS_FILL_MODE_LOWER, HIPBLAS_OP_N, mb, nb, &alpha, - tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), &beta, - tile_kk.buffer().device_ptr_on(device), tile_kk.lda()); + tile_mk.buffer().current_device_ptr(), tile_mk.lda(), &beta, + tile_kk.buffer().current_device_ptr(), tile_kk.lda()); #endif #ifdef DEBUG_TILES_VALUES @@ -545,17 +545,17 @@ namespace potrf { CUBLAS_OP_N, CUBLAS_OP_T, tile_mk.rows(), tile_nk.rows(), tile_nk.cols(), &alpha, - tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), - tile_nk.buffer().device_ptr_on(device), tile_nk.lda(), &beta, - tile_mn.buffer().device_ptr_on(device), tile_mn.lda()); + tile_mk.buffer().current_device_ptr(), tile_mk.lda(), + tile_nk.buffer().current_device_ptr(), tile_nk.lda(), &beta, + tile_mn.buffer().current_device_ptr(), tile_mn.lda()); #elif defined(TTG_HAVE_HIPBLAS) hipblasDgemm(hipblas_handle(), HIPBLAS_OP_N, HIPBLAS_OP_T, tile_mk.rows(), tile_nk.rows(), tile_nk.cols(), &alpha, - tile_mk.buffer().device_ptr_on(device), tile_mk.lda(), - tile_nk.buffer().device_ptr_on(device), tile_nk.lda(), &beta, - tile_mn.buffer().device_ptr_on(device), tile_mn.lda()); + tile_mk.buffer().current_device_ptr(), tile_mk.lda(), + tile_nk.buffer().current_device_ptr(), tile_nk.lda(), &beta, + tile_mn.buffer().current_device_ptr(), tile_mn.lda()); #endif diff --git a/ttg/ttg/parsec/buffer.h b/ttg/ttg/parsec/buffer.h index e3d7a84b3..bd1acf338 100644 --- a/ttg/ttg/parsec/buffer.h +++ b/ttg/ttg/parsec/buffer.h @@ -164,7 +164,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t /* make sure it's a valid device */ assert(parsec_nb_devices > device_id); /* make sure it's a valid copy */ - int parsec_id = detail::device_to_parsec_device(device); + int parsec_id = detail::ttg_device_to_parsec_device(device); assert(m_data->device_copies[parsec_id] != nullptr); m_data->owner_device = parsec_id; } @@ -173,20 +173,20 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t * device buffer. */ ttg::device::Device get_owner_device() const { assert(is_valid()); - return detail::parsec_device_to_device(m_data->owner_device); + return detail::parsec_device_to_ttg_device(m_data->owner_device); } /* Get the pointer on the currently active device. */ element_type* current_device_ptr() { assert(is_valid()); - int device_id = ttg::device::current_device()+detail::first_device_id; + int device_id = detail::ttg_device_to_parsec_device(ttg::device::current_device()); return static_cast(m_data->device_copies[device_id]->device_private); } /* Get the pointer on the currently active device. */ const element_type* current_device_ptr() const { assert(is_valid()); - int device_id = ttg::device::current_device()+detail::first_device_id; + int device_id = detail::ttg_device_to_parsec_device(ttg::device::current_device()); return static_cast(m_data->device_copies[device_id]->device_private); } @@ -208,7 +208,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t */ element_type* device_ptr_on(const ttg::device::Device& device) { assert(is_valid()); - int device_id = detail::device_to_parsec_device(device); + int device_id = detail::ttg_device_to_parsec_device(device); return static_cast(parsec_data_get_ptr(m_data.get(), device_id)); } @@ -216,7 +216,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t */ const element_type* device_ptr_on(const ttg::device::Device& device) const { assert(is_valid()); - int device_id = detail::device_to_parsec_device(device); + int device_id = detail::ttg_device_to_parsec_device(device); return static_cast(parsec_data_get_ptr(m_data.get(), device_id)); } @@ -230,7 +230,7 @@ struct buffer : public detail::ttg_parsec_data_wrapper_t bool is_valid_on(const ttg::device::Device& device) const { assert(is_valid()); - int device_id = detail::device_to_parsec_device(device); + int device_id = detail::ttg_device_to_parsec_device(device); return (parsec_data_get_ptr(m_data.get(), device_id) != nullptr); } diff --git a/ttg/ttg/parsec/device.h b/ttg/ttg/parsec/device.h index 8f46453ac..77722b1c1 100644 --- a/ttg/ttg/parsec/device.h +++ b/ttg/ttg/parsec/device.h @@ -8,13 +8,13 @@ namespace ttg_parsec { namespace detail { // the first ID of an accelerator in the parsec ID-space - inline int first_device_id = 0; + inline int first_device_id = -1; /** * map from TTG ID-space to parsec ID-space */ inline - int device_to_parsec_device(const ttg::device::Device& device) { + int ttg_device_to_parsec_device(const ttg::device::Device& device) { if (device.is_host()) { return 0; } else { @@ -26,7 +26,7 @@ namespace ttg_parsec { * map from parsec ID-space to TTG ID-space */ inline - ttg::device::Device parsec_device_to_device(int parsec_id) { + ttg::device::Device parsec_device_to_ttg_device(int parsec_id) { if (parsec_id < first_device_id) { return ttg::device::Device(parsec_id, ttg::ExecutionSpace::Host); } diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index 241b723f5..694a13917 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -989,10 +989,13 @@ namespace ttg_parsec { ttg::detail::set_default_world(std::move(world)); // query the first device ID + detail::first_device_id = -1; for (int i = 0; i < parsec_nb_devices; ++i) { - if (parsec_mca_device_is_gpu(i)) { + bool is_gpu = parsec_mca_device_is_gpu(i); + if (detail::first_device_id == -1 && is_gpu) { detail::first_device_id = i; - break; + } else if (detail::first_device_id > -1 && !is_gpu) { + throw std::runtime_error("PaRSEC: Found non-GPU device in GPU ID range!"); } } } @@ -1362,7 +1365,7 @@ namespace ttg_parsec { #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA) { parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t *)gpu_stream; - int device = gpu_device->super.device_index - detail::first_device_id; + int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index); ttg::device::detail::set_current(device, cuda_stream->cuda_stream); } #endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA) @@ -1370,7 +1373,7 @@ namespace ttg_parsec { #if defined(PARSEC_HAVE_DEV_HIP_SUPPORT) && defined(TTG_HAVE_HIP) { parsec_hip_exec_stream_t *hip_stream = (parsec_hip_exec_stream_t *)gpu_stream; - int device = gpu_device->super.device_index - detail::first_device_id; + int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index); ttg::device::detail::set_current(device, hip_stream->hip_stream); } #endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA) @@ -1379,7 +1382,7 @@ namespace ttg_parsec { { parsec_level_zero_exec_stream_t *stream; stream = (parsec_level_zero_exec_stream_t *)gpu_stream; - int device = gpu_device->super.device_index - detail::first_device_id; + int device = detail::parsec_device_to_ttg_device(gpu_device->super.device_index); ttg::device::detail::set_current(device, stream->swq->queue); } #endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA) @@ -1512,7 +1515,7 @@ namespace ttg_parsec { gpu_task->load = task_load; assert(dev_index >= 0); - if (dev_index < detail::first_device_id) { + if (!parsec_mca_device_is_gpu(dev_index)) { return PARSEC_HOOK_RETURN_NEXT; /* Fall back */ } @@ -1997,7 +2000,7 @@ ttg::abort(); // should not happen // TODO: first attempt at sending directly to the device parsec_gpu_data_copy_t* gpu_elem; gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index); - int i = detail::first_device_id;ß + int i = detail::first_device_id; int devid = detail::first_device_id; while (i < parsec_nb_devices) { if (nullptr == gpu_elem) {