Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fabio/cmd buffer kernel update staging #2073

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 66 additions & 17 deletions include/ur_api.h

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)(
const size_t *,
const size_t *,
uint32_t,
ur_kernel_handle_t *,
uint32_t,
const ur_exp_command_buffer_sync_point_t *,
ur_exp_command_buffer_sync_point_t *,
ur_exp_command_buffer_command_handle_t *);
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_command_buffer_update_capability_flag_t enum
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_command_buffer_info_t enum
/// @returns
Expand Down
138 changes: 131 additions & 7 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
template <>
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag);
template <>
inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size);

Expand Down Expand Up @@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params);
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params);
Expand Down Expand Up @@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
break;
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP";
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP";
break;
case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP:
os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP";
Expand Down Expand Up @@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr;
if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;
ur::details::printFlag<ur_device_command_buffer_update_capability_flag_t>(os,
*tptr);

os << ")";
} break;
Expand Down Expand Up @@ -9701,6 +9705,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy
return os;
}
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) {
switch (value) {
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE";
break;
default:
os << "unknown enumerator";
break;
}
return os;
}

namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_command_buffer_update_capability_flag_t flag
template <>
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag) {
uint32_t val = flag;
bool first = true;

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
}
if (val != 0) {
std::bitset<32> bits(val);
if (!first) {
os << " | ";
}
os << "unknown bit flags " << bits;
} else if (first) {
os << "0";
}
return UR_RESULT_SUCCESS;
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_command_buffer_info_t type
/// @returns
/// std::ostream &
Expand Down Expand Up @@ -9953,6 +10054,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu
ur::details::printStruct(os,
(params.pNext));

os << ", ";
os << ".hNewKernel = ";

ur::details::printPtr(os,
(params.hNewKernel));

os << ", ";
os << ".numNewMemObjArgs = ";

Expand Down Expand Up @@ -15951,6 +16058,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
ur::details::printPtr(os,
*(params->ppLocalWorkSize));

os << ", ";
os << ".numKernelAlternatives = ";

os << *(params->pnumKernelAlternatives);

os << ", ";
os << ".phKernelAlternatives = {";
for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphKernelAlternatives))[i]);
}
os << "}";

os << ", ";
os << ".numSyncPointsInWaitList = ";

Expand Down
38 changes: 29 additions & 9 deletions scripts/core/EXP-COMMAND-BUFFER.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ were obtained from.
// sync-point
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pLocalWorkSize, 1, &syncPoint,
nullptr, nullptr);
pLocalWorkSize, 0, nullptr, 1,
&syncPoint, nullptr, nullptr);

Enqueueing Command-Buffers
--------------------------------------------------------------------------------
Expand All @@ -167,13 +167,21 @@ Updating Command-Buffer Commands

An adapter implementing the command-buffer experimental feature can optionally
support updating the configuration of kernel commands recorded to a
command-buffer. Support for this is reported by returning true in the
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query.
command-buffer. The attributes of kernel commands that can be updated are
device specific and can be queried using the
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query.

Updating kernel commands is done by passing the new kernel configuration
to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of
the kernel command to update. Configurations that can be changed are the
parameters to the kernel and the execution ND-Range.
kernel handle, the parameters to the kernel and the execution ND-Range.

Kernel handles that might be used to update the kernel of a command, need
to be registered when the command is created. This can be done
using the ``phKernelAlternatives`` parameter of
${x}CommandBufferAppendKernelLaunchExp. The command can then be updated
to use the new kernel handle by passing it to
${x}CommandBufferUpdateKernelLaunchExp.

.. parsed-literal::

Expand All @@ -187,12 +195,14 @@ parameters to the kernel and the execution ND-Range.
${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer);

// Append a kernel command which has two buffer parameters, an input
// and an output.
// and an output. Register hNewKernel as an alternative kernel handle
// which can later be used to change the kernel handle associated
// with this command.
${x}_exp_command_buffer_command_handle_t hCommand;
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pLocalWorkSize, 0, nullptr,
nullptr, &hCommand);
pLocalWorkSize, 1, &hNewKernel,
0, nullptr, nullptr, &hCommand);

// Close the command-buffer before updating
${x}CommandBufferFinalizeExp(hCommandBuffer);
Expand Down Expand Up @@ -220,6 +230,7 @@ parameters to the kernel and the execution ND-Range.
${x}_exp_command_buffer_update_kernel_launch_desc_t update {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
hNewKernel // hNewKernel
2, // numNewMemobjArgs
0, // numNewPointerArgs
0, // numNewValueArgs
Expand Down Expand Up @@ -249,7 +260,13 @@ Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ${x}_device_info_t
* ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP
* ${x}_device_command_buffer_update_capability_flags_t
* UPDATE_KERNEL_ARGUMENTS
* LOCAL_WORK_SIZE
* GLOBAL_WORK_SIZE
* GLOBAL_WORK_OFFSET
* KERNEL_HANDLE
* ${x}_result_t
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
Expand Down Expand Up @@ -340,6 +357,8 @@ Changelog
+-----------+-------------------------------------------------------+
| 1.4 | Add function definitions for kernel command update |
+-----------+-------------------------------------------------------+
| 1.5 | Add support for updating kernel handles. |
+-----------+-------------------------------------------------------+

Contributors
--------------------------------------------------------------------------------
Expand All @@ -348,3 +367,4 @@ Contributors
* Ewan Crawford `[email protected] <[email protected]>`_
* Maxime France-Pillois `[email protected] <[email protected]>`_
* Aaron Greig `[email protected] <[email protected]>`_
* Fábio Mestre `[email protected] <[email protected]>`_
Loading
Loading