Skip to content

Commit

Permalink
Provide host memory allocation/release callbacks to the copies
Browse files Browse the repository at this point in the history
Allow PaRSEC to allocate host memory on demand, e.g., when data is
evicted or we move data to a host task. Most data may never be needed
on the host so it is wasteful to allocate it eagerly.

Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
devreal committed Dec 9, 2024
1 parent f23639d commit e84885f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cmake/modules/FindOrFetchPARSEC.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ if (NOT TARGET PaRSEC::parsec)
FetchContent_Declare(
PARSEC
GIT_REPOSITORY https://github.com/devreal/parsec-1.git
GIT_TAG ${TTG_TRACKED_PARSEC_TAG}
GIT_TAG data_copy_alloc_callbacks
)
FetchContent_MakeAvailable(PARSEC)
FetchContent_GetProperties(PARSEC
Expand Down
53 changes: 42 additions & 11 deletions ttg/ttg/parsec/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,32 @@ namespace detail {
PtrT m_ptr; // keep a reference if PtrT is a shared_ptr
std::size_t m_size;

void allocate(std::size_t size) {
void do_allocate() {
if constexpr (std::is_pointer_v<PtrT>) {
m_ptr = allocator_traits::allocate(m_allocator, size);
m_ptr = allocator_traits::allocate(m_allocator, m_size);
}
this->device_private = m_ptr;
m_size = size;
}

void deallocate() {
allocator_traits::deallocate(m_allocator, static_cast<value_type*>(this->device_private), this->m_size);
this->device_private = nullptr;
this->m_size = 0;
void do_deallocate() {
if constexpr (std::is_pointer_v<PtrT>) {
if (this->device_private != nullptr) {
auto ptr = m_ptr;
this->device_private = nullptr;
this->m_ptr = nullptr;
allocator_traits::deallocate(m_allocator, ptr, this->m_size);
}
}
}

static void allocate(parsec_data_copy_t *parsec_copy, int device) {
data_copy_type* copy = static_cast<data_copy_type*>(parsec_copy);
copy->do_allocate(parsec_copy->original->nb_elts);
}

static void deallocate(parsec_data_copy_t *parsec_copy, int device) {
data_copy_type* copy = static_cast<data_copy_type*>(parsec_copy);
copy->do_deallocate();
}

public:
Expand All @@ -99,20 +113,37 @@ namespace detail {
constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
assert(is_empty_allocator);
m_ptr = std::move(ptr);
this->m_size = size;
this->dtt = parsec_datatype_int8_t;
this->device_private = const_cast<value_type*>(to_address(m_ptr));
}

void construct(std::size_t size,
ttg::scope scope,
const allocator_type& alloc = allocator_type()) {
constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
assert(!is_empty_allocator);
m_allocator = alloc;
allocate(size);
this->device_private = m_ptr;
this->m_size = size;
this->dtt = parsec_datatype_int8_t;
if (scope == ttg::scope::Allocate) {
/* if the user only requests an allocation on the device
* we don't allocate host memory but provide PaRSEC with
* a way to request host memory from us. */
this->alloc_cb = &allocate;
this->release_cb = &deallocate;
} else {
/* the user requested that the data be sync'ed into the device
* so we need to provide host memory for the user to fill prior */
do_allocate();
this->device_private = m_ptr;
}
}

~data_copy_type() {
this->deallocate();
this->alloc_cb = nullptr;
this->release_cb = nullptr;
this->do_deallocate();
}
};

Expand Down Expand Up @@ -142,7 +173,7 @@ namespace detail {

/* create the host copy and allocate host memory */
data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type);
copy->construct(size, allocator);
copy->construct(size, scope, allocator);
parsec_data_copy_attach(data, copy, 0);

/* adjust data flags */
Expand Down
4 changes: 4 additions & 0 deletions ttg/ttg/parsec/devicefunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ namespace ttg_parsec {
/* enqueue the transfer into the compute stream to come back once the compute and transfer are complete */
if (data->owner_device != 0) {
parsec_device_gpu_module_t *device_module = detail::parsec_ttg_caller->dev_ptr->device;
if (nullptr == data->device_copies[0]->device_private) {
assert(nullptr != data->device_copies[0]->alloc_cb);
data->device_copies[0]->alloc_cb(data->device_copies[0]);
}
device_module->memcpy_async(device_module, stream,
data->device_copies[0]->device_private,
data->device_copies[data->owner_device]->device_private,
Expand Down

0 comments on commit e84885f

Please sign in to comment.