Skip to content

Commit

Permalink
Fix kernel argument indices bug
Browse files Browse the repository at this point in the history
  • Loading branch information
fabiomestre committed Jan 14, 2025
1 parent 39df031 commit 0a071be
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 38 deletions.
31 changes: 19 additions & 12 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct ur_kernel_handle_t_ {
args_size_t ParamSizes;
/// Byte offset into /p Storage allocation for each parameter.
args_index_t Indices;
/// Largest argument index that has been added to this kernel so far.
size_t InsertPos = 0;
/// Aligned size in bytes for each local memory parameter after padding has
/// been added. Zero if the argument at the index isn't a local memory
/// argument.
Expand Down Expand Up @@ -101,6 +103,7 @@ struct ur_kernel_handle_t_ {
/// Implicit offset argument is kept at the back of the indices collection.
void addArg(size_t Index, size_t Size, const void *Arg,
size_t LocalSize = 0) {
// Expand storage to accommodate this Index if needed.
if (Index + 2 > Indices.size()) {
// Move implicit offset argument index with the end
Indices.resize(Index + 2, Indices.back());
Expand All @@ -109,14 +112,21 @@ struct ur_kernel_handle_t_ {
AlignedLocalMemSize.resize(Index + 1);
OriginalLocalMemSize.resize(Index + 1);
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;

// Copy new argument to storage if it hasn't been added before.
if (ParamSizes[Index] == 0) {
ParamSizes[Index] = Size;
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;
InsertPos += Size;
}
// Otherwise, update the existing argument.
else {
std::memcpy(Indices[Index], Arg, Size);
AlignedLocalMemSize[Index] = LocalSize;
assert(Size == ParamSizes[Index]);
}
}

/// Returns the padded size and offset of a local memory argument.
Expand Down Expand Up @@ -177,10 +187,7 @@ struct ur_kernel_handle_t_ {
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;

// Store new offset into local data
const size_t InsertPos =
std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + SuccIndex, size_t{0});
std::memcpy(&Storage[InsertPos], &SuccAlignedLocalOffset,
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
sizeof(size_t));
}
}
Expand Down
65 changes: 39 additions & 26 deletions source/adapters/hip/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ struct ur_kernel_handle_t_ {
args_size_t ParamSizes;
/// Byte offset into /p Storage allocation for each parameter.
args_index_t Indices;
/// Largest argument index that has been added to this kernel so far.
size_t InsertPos = 0;
/// Aligned size in bytes for each local memory parameter after padding has
/// been added. Zero if the argument at the index isn't a local memory
/// argument.
Expand Down Expand Up @@ -95,22 +97,30 @@ struct ur_kernel_handle_t_ {
/// Implicit offset argument is kept at the back of the indices collection.
void addArg(size_t Index, size_t Size, const void *Arg,
size_t LocalSize = 0) {
// Expand storage to accommodate this Index if needed.
if (Index + 2 > Indices.size()) {
// Move implicit offset argument Index with the end
// Move implicit offset argument index with the end
Indices.resize(Index + 2, Indices.back());
// Ensure enough space for the new argument
ParamSizes.resize(Index + 1);
AlignedLocalMemSize.resize(Index + 1);
OriginalLocalMemSize.resize(Index + 1);
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;

// Copy new argument to storage if it hasn't been added before.
if (ParamSizes[Index] == 0) {
ParamSizes[Index] = Size;
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;
InsertPos += Size;
}
// Otherwise, update the existing argument.
else {
std::memcpy(Indices[Index], Arg, Size);
AlignedLocalMemSize[Index] = LocalSize;
assert(Size == ParamSizes[Index]);
}
}

/// Returns the padded size and offset of a local memory argument.
Expand Down Expand Up @@ -151,20 +161,11 @@ struct ur_kernel_handle_t_ {
return std::make_pair(AlignedLocalSize, AlignedLocalOffset);
}

void addLocalArg(size_t Index, size_t Size) {
// Get the aligned argument size and offset into local data
auto [AlignedLocalSize, AlignedLocalOffset] =
calcAlignedLocalArgument(Index, Size);

// Store argument details
addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
AlignedLocalSize);

// For every existing local argument which follows at later argument
// indices, update the offset and pointer into the kernel local memory.
// Required as padding will need to be recalculated.
// Iterate over all existing local argument which follows StartIndex
// index, update the offset and pointer into the kernel local memory.
void updateLocalArgOffset(size_t StartIndex) {
const size_t NumArgs = Indices.size() - 1; // Accounts for implicit arg
for (auto SuccIndex = Index + 1; SuccIndex < NumArgs; SuccIndex++) {
for (auto SuccIndex = StartIndex; SuccIndex < NumArgs; SuccIndex++) {
const size_t OriginalLocalSize = OriginalLocalMemSize[SuccIndex];
if (OriginalLocalSize == 0) {
// Skip if successor argument isn't a local memory arg
Expand All @@ -179,14 +180,26 @@ struct ur_kernel_handle_t_ {
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;

// Store new offset into local data
const size_t InsertPos =
std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + SuccIndex, size_t{0});
std::memcpy(&Storage[InsertPos], &SuccAlignedLocalOffset,
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
sizeof(size_t));
}
}

void addLocalArg(size_t Index, size_t Size) {
// Get the aligned argument size and offset into local data
auto [AlignedLocalSize, AlignedLocalOffset] =
calcAlignedLocalArgument(Index, Size);

// Store argument details
addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
AlignedLocalSize);

// For every existing local argument which follows at later argument
// indices, update the offset and pointer into the kernel local memory.
// Required as padding will need to be recalculated.
updateLocalArgOffset(Index + 1);
}

void addMemObjArg(int Index, ur_mem_handle_t hMem, ur_mem_flags_t Flags) {
assert(hMem && "Invalid mem handle");
// To avoid redundancy we are not storing mem obj with index i at index
Expand Down

0 comments on commit 0a071be

Please sign in to comment.