From e84885f6d4f8f9f2a8f7d36ac8370157701618e5 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 26 Nov 2024 09:22:21 -0500 Subject: [PATCH] Provide host memory allocation/release callbacks to the copies Allow PaRSEC to allocate host memory on demand, e.g., when data is evicted or we move data to a host task. Most data may never be needed on the host so it is wasteful to allocate it eagerly. Signed-off-by: Joseph Schuchart --- cmake/modules/FindOrFetchPARSEC.cmake | 2 +- ttg/ttg/parsec/buffer.h | 53 +++++++++++++++++++++------ ttg/ttg/parsec/devicefunc.h | 4 ++ 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/cmake/modules/FindOrFetchPARSEC.cmake b/cmake/modules/FindOrFetchPARSEC.cmake index 7b164019f..ee97d6279 100644 --- a/cmake/modules/FindOrFetchPARSEC.cmake +++ b/cmake/modules/FindOrFetchPARSEC.cmake @@ -18,7 +18,7 @@ if (NOT TARGET PaRSEC::parsec) FetchContent_Declare( PARSEC GIT_REPOSITORY https://github.com/devreal/parsec-1.git - GIT_TAG ${TTG_TRACKED_PARSEC_TAG} + GIT_TAG data_copy_alloc_callbacks ) FetchContent_MakeAvailable(PARSEC) FetchContent_GetProperties(PARSEC diff --git a/ttg/ttg/parsec/buffer.h b/ttg/ttg/parsec/buffer.h index 579567337..e0f4a6686 100644 --- a/ttg/ttg/parsec/buffer.h +++ b/ttg/ttg/parsec/buffer.h @@ -71,18 +71,32 @@ namespace detail { PtrT m_ptr; // keep a reference if PtrT is a shared_ptr std::size_t m_size; - void allocate(std::size_t size) { + void do_allocate() { if constexpr (std::is_pointer_v) { - m_ptr = allocator_traits::allocate(m_allocator, size); + m_ptr = allocator_traits::allocate(m_allocator, m_size); } this->device_private = m_ptr; - m_size = size; } - void deallocate() { - allocator_traits::deallocate(m_allocator, static_cast(this->device_private), this->m_size); - this->device_private = nullptr; - this->m_size = 0; + void do_deallocate() { + if constexpr (std::is_pointer_v) { + if (this->device_private != nullptr) { + auto ptr = m_ptr; + this->device_private = nullptr; + this->m_ptr = nullptr; + allocator_traits::deallocate(m_allocator, ptr, this->m_size); + } + } + } + + static void allocate(parsec_data_copy_t *parsec_copy, int device) { + data_copy_type* copy = static_cast(parsec_copy); + copy->do_allocate(parsec_copy->original->nb_elts); + } + + static void deallocate(parsec_data_copy_t *parsec_copy, int device) { + data_copy_type* copy = static_cast(parsec_copy); + copy->do_deallocate(); } public: @@ -99,20 +113,37 @@ namespace detail { constexpr const bool is_empty_allocator = std::is_same_v>; assert(is_empty_allocator); m_ptr = std::move(ptr); + this->m_size = size; + this->dtt = parsec_datatype_int8_t; this->device_private = const_cast(to_address(m_ptr)); } void construct(std::size_t size, + ttg::scope scope, const allocator_type& alloc = allocator_type()) { constexpr const bool is_empty_allocator = std::is_same_v>; assert(!is_empty_allocator); m_allocator = alloc; - allocate(size); - this->device_private = m_ptr; + this->m_size = size; + this->dtt = parsec_datatype_int8_t; + if (scope == ttg::scope::Allocate) { + /* if the user only requests an allocation on the device + * we don't allocate host memory but provide PaRSEC with + * a way to request host memory from us. */ + this->alloc_cb = &allocate; + this->release_cb = &deallocate; + } else { + /* the user requested that the data be sync'ed into the device + * so we need to provide host memory for the user to fill prior */ + do_allocate(); + this->device_private = m_ptr; + } } ~data_copy_type() { - this->deallocate(); + this->alloc_cb = nullptr; + this->release_cb = nullptr; + this->do_deallocate(); } }; @@ -142,7 +173,7 @@ namespace detail { /* create the host copy and allocate host memory */ data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type); - copy->construct(size, allocator); + copy->construct(size, scope, allocator); parsec_data_copy_attach(data, copy, 0); /* adjust data flags */ diff --git a/ttg/ttg/parsec/devicefunc.h b/ttg/ttg/parsec/devicefunc.h index 080660205..55be37afd 100644 --- a/ttg/ttg/parsec/devicefunc.h +++ b/ttg/ttg/parsec/devicefunc.h @@ -128,6 +128,10 @@ namespace ttg_parsec { /* enqueue the transfer into the compute stream to come back once the compute and transfer are complete */ if (data->owner_device != 0) { parsec_device_gpu_module_t *device_module = detail::parsec_ttg_caller->dev_ptr->device; + if (nullptr == data->device_copies[0]->device_private) { + assert(nullptr != data->device_copies[0]->alloc_cb); + data->device_copies[0]->alloc_cb(data->device_copies[0]); + } device_module->memcpy_async(device_module, stream, data->device_copies[0]->device_private, data->device_copies[data->owner_device]->device_private,