From c5fb672f7b6f4a244483139b2743961d80466161 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 21:13:50 +0000 Subject: [PATCH 01/25] Update core program.yml --- scripts/core/program.yml | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/scripts/core/program.yml b/scripts/core/program.yml index 23f07d4287..e35c895360 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -119,7 +119,7 @@ returns: - "`length == 0`" --- #-------------------------------------------------------------------------- type: function -desc: "Create a program object from device native binary." +desc: "Create a program object from native binaries for the specified devices." class: $xProgram name: CreateWithBinary decl: static @@ -128,22 +128,25 @@ analogue: - "**clCreateProgramWithBinary**" details: - "The application may call this function from simultaneous threads." - - "Following a successful call to this entry point, `phProgram` will contain a binary of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`." - - "The device specified by `hDevice` must be device associated with context." - - "The adapter may (but is not required to) perform validation of the provided module during this call." + - "Following a successful call to this entry point, `phProgram` will contain binaries of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in `phDevices`." + - "The devices specified by `phDevices` must be associated with the context." + - "The adapter may (but is not required to) perform validation of the provided modules during this call." params: - type: $x_context_handle_t name: hContext desc: "[in] handle of the context instance" - - type: $x_device_handle_t - name: hDevice - desc: "[in] handle to device associated with binary." - - type: size_t - name: size - desc: "[in] size in bytes." - - type: const uint8_t* - name: pBinary - desc: "[in] pointer to binary." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] a pointer to a list of device handles. The binaries are loaded for devices specified in this list." + - type: size_t* + name: pLengths + desc: "[in][range(0, numDevices)] array of sizes of program binaries specified by `pBinaries` (in bytes)." + - type: const uint8_t** + name: ppBinaries + desc: "[in][range(0, numDevices)] pointer to program binaries to be loaded for devices specified by `phDevices`." - type: const $x_program_properties_t* name: pProperties desc: "[in][optional] pointer to program creation properties." @@ -156,7 +159,7 @@ returns: - $X_RESULT_ERROR_INVALID_SIZE: - "`NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0`" - $X_RESULT_ERROR_INVALID_NATIVE_BINARY: - - "If `pBinary` isn't a valid binary for `hDevice.`" + - "If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.`" --- #-------------------------------------------------------------------------- type: function desc: "Produces an executable program from one program, negates need for the linking step." From 35f0b54edd756617b20fbc02dac1e6d44a1faa68 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 21:20:05 +0000 Subject: [PATCH 02/25] Generated sources --- include/ur_api.h | 36 ++++++++++------- include/ur_ddi.h | 7 ++-- include/ur_print.hpp | 39 +++++++++++++++---- .../level_zero/ur_interface_loader.hpp | 6 +-- source/adapters/mock/ur_mockddi.cpp | 17 +++++--- source/loader/layers/tracing/ur_trcddi.cpp | 22 +++++++---- source/loader/layers/validation/ur_valddi.cpp | 34 +++++++++------- source/loader/ur_ldrddi.cpp | 26 +++++++++---- source/loader/ur_libapi.cpp | 37 +++++++++++------- source/ur_api.cpp | 33 ++++++++++------ 10 files changed, 169 insertions(+), 88 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 9897d892b6..02e52c0b4f 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -4202,17 +4202,19 @@ urProgramCreateWithIL( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -4225,21 +4227,26 @@ urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t **ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t *pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created. ); @@ -10319,9 +10326,10 @@ typedef struct ur_program_create_with_il_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_program_create_with_binary_params_t { ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - size_t *psize; - const uint8_t **ppBinary; + uint32_t *pnumDevices; + ur_device_handle_t **pphDevices; + size_t **ppLengths; + const uint8_t ***pppBinaries; const ur_program_properties_t **ppProperties; ur_program_handle_t **pphProgram; } ur_program_create_with_binary_params_t; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index b4d6f2bade..80a0003fca 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -284,9 +284,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)( /// @brief Function-pointer for urProgramCreateWithBinary typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithBinary_t)( ur_context_handle_t, - ur_device_handle_t, - size_t, - const uint8_t *, + uint32_t, + ur_device_handle_t *, + size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *); diff --git a/include/ur_print.hpp b/include/ur_print.hpp index a908fd9275..79a34cf361 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -11164,21 +11164,44 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".numDevices = "; - ur::details::printPtr(os, - *(params->phDevice)); + os << *(params->pnumDevices); os << ", "; - os << ".size = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - os << *(params->psize); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; os << ", "; - os << ".pBinary = "; + os << ".pLengths = {"; + for (size_t i = 0; *(params->ppLengths) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - *(params->ppBinary)); + os << (*(params->ppLengths))[i]; + } + os << "}"; + + os << ", "; + os << ".ppBinaries = {"; + for (size_t i = 0; *(params->pppBinaries) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pppBinaries))[i]); + } + os << "}"; os << ", "; os << ".pProperties = "; diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 540eab7292..1207f7776b 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -187,9 +187,9 @@ ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram); ur_result_t urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram); + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram); ur_result_t urProgramBuild(ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions); ur_result_t urProgramCompile(ur_context_handle_t hContext, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 0ec36ed687..8604841bf1 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -3157,10 +3157,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -3169,7 +3175,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_result_t result = UR_RESULT_SUCCESS; ur_program_create_with_binary_params_t params = { - &hContext, &hDevice, &size, &pBinary, &pProperties, &phProgram}; + &hContext, &numDevices, &phDevices, &pLengths, + &ppBinaries, &pProperties, &phProgram}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urProgramCreateWithBinary")); diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 57d7fe3702..c0ef0fc037 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -2650,10 +2650,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2667,7 +2673,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( } ur_program_create_with_binary_params_t params = { - &hContext, &hDevice, &size, &pBinary, &pProperties, &phProgram}; + &hContext, &numDevices, &phDevices, &pLengths, + &ppBinaries, &pProperties, &phProgram}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY, "urProgramCreateWithBinary", ¶ms); @@ -2675,8 +2682,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( auto &logger = getContext()->logger; logger.info(" ---> urProgramCreateWithBinary\n"); - ur_result_t result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram); + ur_result_t result = + pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); getContext()->notify_end(UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY, "urProgramCreateWithBinary", ¶ms, &result, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 0911745f03..65d0c5f4ab 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -2721,10 +2721,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2742,11 +2748,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (NULL == hDevice) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == pBinary) { + if (NULL == pLengths) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ppBinaries) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -2770,13 +2780,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( getContext()->refCountContext->logInvalidReference(hContext); } - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hDevice)) { - getContext()->refCountContext->logInvalidReference(hDevice); - } - - ur_result_t result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram); + ur_result_t result = + pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS) { getContext()->refCountContext->createRefCount(*phProgram); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 4e50271b58..5bcfe0d954 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -2563,10 +2563,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2586,12 +2592,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } // forward to device-platform - result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, pProperties, - phProgram); + result = pfnCreateWithBinary(hContext, numDevices, phDevicesLocal.data(), + pLengths, ppBinaries, pProperties, phProgram); if (UR_RESULT_SUCCESS != result) { return result; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index ca6d4da65d..6b0fbcf7d7 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2996,17 +2996,19 @@ ur_result_t UR_APICALL urProgramCreateWithIL( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -3019,21 +3021,28 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -3045,8 +3054,8 @@ ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnCreateWithBinary(hContext, hDevice, size, pBinary, pProperties, - phProgram); + return pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index cdfdb648af..ebe0a6ede9 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2563,17 +2563,19 @@ ur_result_t UR_APICALL urProgramCreateWithIL( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -2586,21 +2588,28 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t From 1aebc43d80239c696eb4b6d564dde58310019a2f Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 23:12:55 -0700 Subject: [PATCH 03/25] Fix conformance test --- test/conformance/source/environment.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 495d51e1c1..0fd9a604f8 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -570,9 +570,11 @@ ur_result_t KernelsEnvironment::CreateProgram( backend == UR_PLATFORM_BACKEND_CUDA) { // The CUDA and HIP adapters do not support urProgramCreateWithIL so we // need to use urProgramCreateWithBinary instead. + auto size = binary.size(); + auto data = binary.data(); if (auto error = urProgramCreateWithBinary( - hContext, hDevice, binary.size(), - reinterpret_cast(binary.data()), properties, + hContext, 1, &hDevice, &size, + reinterpret_cast(&data), properties, phProgram)) { return error; } From f13f4ea8a2038584119cbd28d28afb4d6a284be0 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 18 Sep 2024 22:17:59 +0000 Subject: [PATCH 04/25] Add stub implementation to L0 adapter --- source/adapters/level_zero/program.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 5f5ec387a0..8433a06595 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -118,6 +118,26 @@ ur_result_t urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +ur_result_t urProgramCreateWithBinaryExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t + *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of + ///< device handles. The binaries are loaded for devices + ///< specified in this list. + size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program + ///< binaries specified by `pBinaries` (in bytes). + const uint8_t * + *ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries + ///< to be loaded for devices specified by `phDevices`. + const ur_program_properties_t * + pProperties, ///< [in][optional] pointer to program creation properties. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of Program object created. +) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + ur_result_t urProgramBuild( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in] Handle of the program to build. From 2a8e6e5001c7efb4cb62cf71d103ce8d2c089211 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 19 Sep 2024 11:49:45 -0700 Subject: [PATCH 05/25] Store per device data of the UR Program in a separate structure Currenlty we have multiple maps of device handle to some per-device data like the module handle, logs, binary etc. Instead of having separate map for each, just put that data into a separate structure and have one map. --- source/adapters/level_zero/program.hpp | 122 ++++++++++++++----------- 1 file changed, 71 insertions(+), 51 deletions(-) diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 42330adcbf..2aeeb674bd 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -10,6 +10,8 @@ #pragma once #include "common.hpp" +#include "context.hpp" +#include "device.hpp" struct ur_program_handle_t_ : _ur_object { // ur_program_handle_t_() {} @@ -68,50 +70,60 @@ struct ur_program_handle_t_ : _ur_object { // Construct a program in IL. ur_program_handle_t_(state St, ur_context_handle_t Context, const void *Input, size_t Length) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, Code{new uint8_t[Length]}, - CodeLength{Length}, ZeModule{nullptr}, ZeBuildLog{nullptr} { - std::memcpy(Code.get(), Input, Length); + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + SpirvCode{new uint8_t[Length]}, SpirvCodeLength{Length}, + InteropZeModule{nullptr} { + std::memcpy(SpirvCode.get(), Input, Length); + // All devices have the program in IL state. + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } } - // Construct a program in NATIVE. + // Construct a program in NATIVE for multiple devices. ur_program_handle_t_(state St, ur_context_handle_t Context, - ur_device_handle_t Device, + const uint32_t NumDevices, + const ur_device_handle_t *Devices, const ur_program_properties_t *Properties, - const void *Input, size_t Length) - : Context{Context}, NativeDevice(Device), NativeProperties(Properties), - OwnZeModule{true}, State{St}, Code{new uint8_t[Length]}, - CodeLength{Length}, ZeModule{nullptr}, ZeBuildLog{nullptr} { - std::memcpy(Code.get(), Input, Length); + const uint8_t **Inputs, const size_t *Lengths) + : Context{Context}, NativeProperties(Properties), OwnZeModule{true}, + InteropZeModule{nullptr} { + for (uint32_t I = 0; I < NumDevices; ++I) { + DeviceData &PerDevData = DeviceDataMap[Devices[I]->ZeDevice]; + PerDevData.State = St; + PerDevData.Binary = std::make_pair( + std::unique_ptr(new uint8_t[Lengths[I]]), Lengths[I]); + std::memcpy(PerDevData.Binary.first.get(), Inputs[I], Lengths[I]); + } } // Construct a program in Exe or Invalid state. ur_program_handle_t_(state St, ur_context_handle_t Context, ze_module_handle_t ZeModule, ze_module_build_log_handle_t ZeBuildLog) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, ZeModule{ZeModule}, ZeBuildLog{ - ZeBuildLog} {} + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + InteropZeModule{ZeModule} { + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } + } // Construct a program in Exe state (interop). ur_program_handle_t_(state St, ur_context_handle_t Context, ze_module_handle_t ZeModule, bool OwnZeModule) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{OwnZeModule}, State{St}, ZeModule{ZeModule}, ZeBuildLog{ - nullptr} {} - - // Construct a program from native handle - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, ZeModule{ZeModule}, ZeBuildLog{nullptr} {} + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, + InteropZeModule{ZeModule} { + // TODO: Currently it is not possible to understand the device associated + // with provided ZeModule. So we can't set the state on that device to Exe. + } // Construct a program in Invalid state with a custom error message. ur_program_handle_t_(state St, ur_context_handle_t Context, const std::string &ErrorMessage) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, ErrorMessage{ErrorMessage}, State{St}, - ZeModule{nullptr}, ZeBuildLog{nullptr} {} + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + ErrorMessage{ErrorMessage}, InteropZeModule{nullptr} {} ~ur_program_handle_t_(); void ur_release_program_resources(bool deletion); @@ -122,9 +134,6 @@ struct ur_program_handle_t_ : _ur_object { const ur_context_handle_t Context; // Context of the program. - // Device Handle used for the Native Build - ur_device_handle_t NativeDevice; - // Properties used for the Native Build const ur_program_properties_t *NativeProperties; @@ -136,12 +145,10 @@ struct ur_program_handle_t_ : _ur_object { // message from a call to urProgramLink. const std::string ErrorMessage; - state State; - // In IL and Object states, this contains the SPIR-V representation of the - // module. In Native state, it contains the native code. - std::unique_ptr Code; // Array containing raw IL / native code. - size_t CodeLength{0}; // Size (bytes) of the array. + // module. + std::unique_ptr SpirvCode; // Array containing raw IL code. + size_t SpirvCodeLength; // Size (bytes) of the array. // Used only in IL and Object states. Contains the SPIR-V specialization // constants as a map from the SPIR-V "SpecID" to a buffer that contains the @@ -149,22 +156,35 @@ struct ur_program_handle_t_ : _ur_object { // maintaining the storage of this buffer. std::unordered_map SpecConstants; - // Used only in Object state. Contains the build flags from the last call to - // urProgramCompile(). - std::string BuildFlags; - - // The Level Zero module handle. Used primarily in Exe state. - ze_module_handle_t ZeModule{}; - - // Map of L0 Modules created for all the devices for which a UR Program - // has been built. - std::unordered_map ZeModuleMap; - - // The Level Zero build log from the last call to zeModuleCreate(). - ze_module_build_log_handle_t ZeBuildLog{}; + // The Level Zero module handle for interoperability. + // This module handle is either initialized with the handle provided to + // interoperability UR API, or with one of the handles after building the + // program. This handle is returned by UR API which allows to get the native + // handle from the program. + // TODO: Currently interoparability UR API does not support multiple devices. + ze_module_handle_t InteropZeModule{}; + + struct DeviceData { + // Log from the result of building the program for the device using + // zeModuleCreate(). + ze_module_build_log_handle_t ZeBuildLog{}; + + // The Level Zero module handle for the device. Used primarily in Exe state. + ze_module_handle_t ZeModule{}; + + // In Native state, contains the pair of the binary code for the device and + // its length in bytes. + std::pair, size_t> Binary{nullptr, 0}; + + // Build flags used for building the program for the device. + // May be different for different devices, for example, if + // urProgramCompileExp was called multiple times with different build flags + // for different devices. + std::string BuildFlags{}; + + // State of the program for the device. + state State{}; + }; - // Map of L0 Module Build logs created for all the devices for which a UR - // Program has been built. - std::unordered_map - ZeBuildLogMap; + std::unordered_map DeviceDataMap; }; From 2ad216703b1c24396fdb9e1839fae04bde73942a Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 19 Sep 2024 12:59:16 -0700 Subject: [PATCH 06/25] Adjust implementation of functions to new ur_program structure --- source/adapters/level_zero/kernel.cpp | 42 +-- source/adapters/level_zero/program.cpp | 470 ++++++++++++------------- source/adapters/level_zero/program.hpp | 155 ++++++-- 3 files changed, 362 insertions(+), 305 deletions(-) diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index a34782cbae..7781a45fda 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -497,18 +497,11 @@ ur_result_t urEnqueueDeviceGlobalVariableWrite( ///< this particular kernel execution instance. ) { std::scoped_lock lock(Queue->Mutex); - - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } - // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; + ze_module_handle_t ZeModule = + Program->getZeModuleHandle(Queue->Device->ZeDevice); ZE2UR_CALL(zeModuleGetGlobalPointer, (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { @@ -559,15 +552,8 @@ ur_result_t urEnqueueDeviceGlobalVariableRead( ///< this particular kernel execution instance. ) { std::scoped_lock lock(Queue->Mutex); - - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } - + ze_module_handle_t ZeModule = + Program->getZeModuleHandle(Queue->Device->ZeDevice); // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; @@ -605,10 +591,6 @@ ur_result_t urKernelCreate( *RetKernel ///< [out] pointer to handle of kernel object created. ) { std::shared_lock Guard(Program->Mutex); - if (Program->State != ur_program_handle_t_::state::Exe) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } - try { ur_kernel_handle_t_ *UrKernel = new ur_kernel_handle_t_(true, Program); *RetKernel = reinterpret_cast(UrKernel); @@ -618,8 +600,13 @@ ur_result_t urKernelCreate( return UR_RESULT_ERROR_UNKNOWN; } - for (auto It : Program->ZeModuleMap) { - auto ZeModule = It.second; + for (auto &Dev : Program->AssociatedDevices) { + auto ZeDevice = Dev->ZeDevice; + // Program may be associated with all devices from the context but built only for subset of devices. + if (Program->getState(ZeDevice) != ur_program_handle_t_::state::Exe) + continue; + + auto ZeModule = Program->getZeModuleHandle(ZeDevice); ZeStruct ZeKernelDesc; ZeKernelDesc.flags = 0; ZeKernelDesc.pKernelName = KernelName; @@ -634,8 +621,6 @@ ur_result_t urKernelCreate( return ze2urResult(ZeResult); } - auto ZeDevice = It.first; - // Store the kernel in the ZeKernelMap so the correct // kernel can be retrieved later for a specific device // where a queue is being submitted. @@ -652,7 +637,10 @@ ur_result_t urKernelCreate( for (auto ZeSubDevice : ZeSubDevices) { (*RetKernel)->ZeKernelMap[ZeSubDevice] = ZeKernel; } - } + } + // There is no any successfully built executable for program. + if ((*RetKernel)->ZeKernelMap.empty()) + return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; (*RetKernel)->ZeKernel = (*RetKernel)->ZeKernelMap.begin()->second; diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 8433a06595..6dbb8e2b74 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -106,8 +106,8 @@ ur_result_t urProgramCreateWithBinary( try { ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Native, Context, Device, - Properties, Binary, Size); + new ur_program_handle_t_(ur_program_handle_t_::Native, Context, 1, + &Device, Properties, &Binary, &Size); *Program = reinterpret_cast(UrProgram); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -163,27 +163,9 @@ ur_result_t urProgramBuildExp( // UR_ASSERT(Program->Context->isValidDevice(Devices[0]), // UR_RESULT_ERROR_INVALID_VALUE); - // We should have either IL or native device code. - UR_ASSERT(hProgram->Code, UR_RESULT_ERROR_INVALID_PROGRAM); - - // It is legal to build a program created from either IL or from native - // device code. - if (hProgram->State != ur_program_handle_t_::IL && - hProgram->State != ur_program_handle_t_::Native) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - std::scoped_lock Guard(hProgram->Mutex); - // Ask Level Zero to build and load the native code onto the device. - ZeStruct ZeModuleDesc; ur_program_handle_t_::SpecConstantShim Shim(hProgram); - ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) - ? ZE_MODULE_FORMAT_IL_SPIRV - : ZE_MODULE_FORMAT_NATIVE; - - ZeModuleDesc.inputSize = hProgram->CodeLength; - ZeModuleDesc.pInputModule = hProgram->Code.get(); // if large allocations are selected, then pass // ze-opt-greater-than-4GB-buffer-required to disable @@ -198,24 +180,45 @@ ur_result_t urProgramBuildExp( ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; } - ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); - ZeModuleDesc.pConstants = Shim.ze(); ur_result_t Result = UR_RESULT_SUCCESS; - for (uint32_t i = 0; i < numDevices; i++) { + ZeStruct ZeModuleDesc; + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); + ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice; + auto State = hProgram->getState(ZeDevice); + + // We don't want to rebuild the module if it was already built. + if (State == ur_program_handle_t_::Exe) + continue; + + // It is legal to build a program created from either IL or from native + // device code. + if (State != ur_program_handle_t_::IL && + State != ur_program_handle_t_::Native) + return UR_RESULT_ERROR_INVALID_OPERATION; + + // We should have either IL or native device code. + auto Code = hProgram->getCode(ZeDevice); + UR_ASSERT(Code, UR_RESULT_ERROR_INVALID_PROGRAM); + + ZeModuleDesc.format = (State == ur_program_handle_t_::IL) + ? ZE_MODULE_FORMAT_IL_SPIRV + : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->getCodeSize(ZeDevice); + ZeModuleDesc.pInputModule = Code; ze_context_handle_t ZeContext = hProgram->Context->getZeHandle(); ze_module_handle_t ZeModuleHandle = nullptr; ze_module_build_log_handle_t ZeBuildLog{}; - hProgram->State = ur_program_handle_t_::Exe; ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, &ZeModuleHandle, &ZeBuildLog)); + hProgram->setState(ZeDevice, ur_program_handle_t_::Exe); if (ZeResult != ZE_RESULT_SUCCESS) { // We adjust ur_program below to avoid attempting to release zeModule when // RT calls urProgramRelease(). - hProgram->State = ur_program_handle_t_::Invalid; + hProgram->setState(ZeDevice, ur_program_handle_t_::Invalid); Result = ze2urResult(ZeResult); if (ZeModuleHandle) { ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModuleHandle)); @@ -229,7 +232,7 @@ ur_result_t urProgramBuildExp( // Therefore, do an extra check now for unresolved symbols. ZeResult = checkUnresolvedSymbols(ZeModuleHandle, &ZeBuildLog); if (ZeResult != ZE_RESULT_SUCCESS) { - hProgram->State = ur_program_handle_t_::Invalid; + hProgram->setState(ZeDevice, ur_program_handle_t_::Invalid); Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) ? UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE : ze2urResult(ZeResult); @@ -238,15 +241,11 @@ ur_result_t urProgramBuildExp( ZeModuleHandle = nullptr; } } - hProgram->ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModuleHandle)); + hProgram->setZeModule(ZeDevice, ZeModuleHandle); } - hProgram->ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); + hProgram->setBuildLog(ZeDevice, ZeBuildLog); } - if (!hProgram->ZeModuleMap.empty()) - hProgram->ZeModule = hProgram->ZeModuleMap.begin()->second; - if (!hProgram->ZeBuildLogMap.empty()) - hProgram->ZeBuildLog = hProgram->ZeBuildLogMap.begin()->second; return Result; } @@ -259,10 +258,38 @@ ur_result_t urProgramCompileExp( const char *pOptions ///< [in][optional] pointer to build options ///< null-terminated string. ) { - std::ignore = numDevices; - std::ignore = phDevices; - return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, - pOptions); + std::scoped_lock Guard(hProgram->Mutex); + // Check that state is IL for all devices in the context and set the state to + // Object. + for (uint32_t I = 0; I < numDevices; I++) { + auto ZeDevice = phDevices[I]->ZeDevice; + // It's only valid to compile a program created from IL (we don't support + // programs created from source code). + // + // The OpenCL spec says that the header parameters are ignored when + // compiling IL programs, so we don't validate them. + if (hProgram->getState(ZeDevice) != ur_program_handle_t_::IL) + return UR_RESULT_ERROR_INVALID_OPERATION; + hProgram->setState(ZeDevice, ur_program_handle_t_::Object); + // We don't compile anything now. Instead, we delay compilation until + // urProgramLink, where we do both compilation and linking as a single step. + // This produces better code because the driver can do cross-module + // optimizations. Therefore, we just remember the compilation flags, so we + // can use them later. + if (pOptions) { + hProgram->setBuildOptions(ZeDevice, pOptions); + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (phDevices[I]->useRelaxedAllocationLimits()) { + hProgram->appendBuildOptions( + ZeDevice, " -ze-opt-greater-than-4GB-buffer-required"); + } + } + hProgram->setState(ZeDevice, ur_program_handle_t_::Object); + } + return UR_RESULT_SUCCESS; } ur_result_t urProgramCompile( @@ -272,36 +299,9 @@ ur_result_t urProgramCompile( const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { - std::ignore = Context; - std::scoped_lock Guard(Program->Mutex); - - // It's only valid to compile a program created from IL (we don't support - // programs created from source code). - // - // The OpenCL spec says that the header parameters are ignored when compiling - // IL programs, so we don't validate them. - if (Program->State != ur_program_handle_t_::IL) - return UR_RESULT_ERROR_INVALID_OPERATION; - - // We don't compile anything now. Instead, we delay compilation until - // urProgramLink, where we do both compilation and linking as a single step. - // This produces better code because the driver can do cross-module - // optimizations. Therefore, we just remember the compilation flags, so we - // can use them later. - if (Options) { - Program->BuildFlags = Options; - - // if large allocations are selected, then pass - // ze-opt-greater-than-4GB-buffer-required to disable - // stateful optimizations and be able to use larger than - // 4GB allocations on these kernels. - if (Context->getDevices()[0]->useRelaxedAllocationLimits()) { - Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; - } - } - Program->State = ur_program_handle_t_::Object; - - return UR_RESULT_SUCCESS; + auto devices = Context->getDevices(); + return ur::level_zero::urProgramCompileExp(Program, devices.size(), + devices.data(), Options); } ur_result_t urProgramLink( @@ -357,7 +357,7 @@ ur_result_t urProgramLinkExp( ur_result_t UrResult = UR_RESULT_SUCCESS; try { // Acquire a "shared" lock on each of the input programs, and also validate - // that they are all in Object state. + // that they are all in Object state for each device in the input list. // // There is no danger of deadlock here even if two threads call // urProgramLink simultaneously with the same input programs in a different @@ -371,8 +371,12 @@ ur_result_t urProgramLinkExp( for (uint32_t I = 0; I < count; I++) { std::shared_lock Guard(phPrograms[I]->Mutex); Guards[I].swap(Guard); - if (phPrograms[I]->State != ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_OPERATION; + for (uint32_t DeviceIndex = 0; DeviceIndex < numDevices; DeviceIndex++) { + auto Device = phDevices[DeviceIndex]; + if (phPrograms[I]->getState(Device->ZeDevice) != + ur_program_handle_t_::Object) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } } } @@ -393,9 +397,8 @@ ur_result_t urProgramLinkExp( for (uint32_t I = 0; I < count; I++) { ur_program_handle_t Program = phPrograms[I]; - CodeSizes[I] = Program->CodeLength; - CodeBufs[I] = Program->Code.get(); - BuildFlagPtrs[I] = Program->BuildFlags.c_str(); + CodeSizes[I] = Program->getCodeSize(); + CodeBufs[I] = Program->getCode(); SpecConstShims.emplace_back(Program); SpecConstPtrs[I] = SpecConstShims[I].ze(); } @@ -403,7 +406,6 @@ ur_result_t urProgramLinkExp( ZeExtModuleDesc.count = count; ZeExtModuleDesc.inputSizes = CodeSizes.data(); ZeExtModuleDesc.pInputModules = CodeBufs.data(); - ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); ZeExtModuleDesc.pConstants = SpecConstPtrs.data(); ZeStruct ZeModuleDesc; @@ -439,7 +441,6 @@ ur_result_t urProgramLinkExp( ZeModuleDesc.pNext = nullptr; ZeModuleDesc.inputSize = ZeExtModuleDesc.inputSizes[0]; ZeModuleDesc.pInputModule = ZeExtModuleDesc.pInputModules[0]; - ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; ZeModuleDesc.pConstants = ZeExtModuleDesc.pConstants[0]; } else { logger::error( @@ -448,10 +449,11 @@ ur_result_t urProgramLinkExp( return UR_RESULT_ERROR_INVALID_VALUE; } } - std::unordered_map ZeModuleMap; - std::unordered_map - ZeBuildLogMap; + // TODO: Use the module of the first device as the interop module because of + // lack of multi-device support for interop case. + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_(hContext); + *phProgram = reinterpret_cast(UrProgram); for (uint32_t i = 0; i < numDevices; i++) { // Call the Level Zero API to compile, link, and create the module. @@ -459,6 +461,17 @@ ur_result_t urProgramLinkExp( ze_context_handle_t ZeContext = hContext->getZeHandle(); ze_module_handle_t ZeModule = nullptr; ze_module_build_log_handle_t ZeBuildLog = nullptr; + + // Build flags may be different for different devices, so handle them + // here. Clear values of the previous device first. + BuildFlagPtrs.clear(); + for (uint32_t I = 0; I < count; I++) { + BuildFlagPtrs.push_back(phPrograms[I]->getBuildOptions(ZeDevice).c_str()); + } + ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); + if (count == 1) + ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, &ZeModule, &ZeBuildLog)); @@ -485,19 +498,12 @@ ur_result_t urProgramLinkExp( return ze2urResult(ZeResult); } } - ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModule)); - ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); + UrProgram->setZeModule(ZeDevice, ZeModule); + UrProgram->setBuildLog(ZeDevice, ZeBuildLog); + UrProgram->setState(ZeDevice, (UrResult == UR_RESULT_SUCCESS) + ? ur_program_handle_t_::Exe + : ur_program_handle_t_::Invalid); } - - ur_program_handle_t_::state State = (UrResult == UR_RESULT_SUCCESS) - ? ur_program_handle_t_::Exe - : ur_program_handle_t_::Invalid; - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(State, hContext, ZeModuleMap.begin()->second, - ZeBuildLogMap.begin()->second); - *phProgram = reinterpret_cast(UrProgram); - (*phProgram)->ZeModuleMap = std::move(ZeModuleMap); - (*phProgram)->ZeBuildLogMap = std::move(ZeBuildLogMap); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -564,17 +570,11 @@ ur_result_t urProgramGetFunctionPointer( ///< it is found in the program. ) { std::shared_lock Guard(Program->Mutex); - if (Program->State != ur_program_handle_t_::Exe) { + if (Program->getState(Device->ZeDevice) != ur_program_handle_t_::Exe) { return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; } - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } + ze_module_handle_t ZeModule = Program->getZeModuleHandle(Device->ZeDevice); ze_result_t ZeResult = ZE_CALL_NOCHECK( zeModuleGetFunctionPointer, (ZeModule, FunctionName, FunctionPointerRet)); @@ -632,13 +632,7 @@ ur_result_t urProgramGetGlobalVariablePointer( std::scoped_lock lock(Program->Mutex); ze_module_handle_t ZeModuleEntry{}; - ZeModuleEntry = Program->ZeModule; - if (!Program->ZeModuleMap.empty()) { - auto It = Program->ZeModuleMap.find(Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModuleEntry = It->second; - } - } + ZeModuleEntry = Program->getZeModuleHandle(Device->ZeDevice); ze_result_t ZeResult = zeModuleGetGlobalPointer(ZeModuleEntry, GlobalVariableName, @@ -672,55 +666,36 @@ ur_result_t urProgramGetInfo( case UR_PROGRAM_INFO_CONTEXT: return ReturnValue(Program->Context); case UR_PROGRAM_INFO_NUM_DEVICES: - if (!Program->ZeModuleMap.empty()) - return ReturnValue( - uint32_t{ur_cast(Program->ZeModuleMap.size())}); - else - return ReturnValue(uint32_t{1}); + return ReturnValue( + uint32_t{ur_cast(Program->AssociatedDevices.size())}); case UR_PROGRAM_INFO_DEVICES: - if (!Program->ZeModuleMap.empty()) { - std::vector devices; - for (auto &ZeModulePair : Program->ZeModuleMap) { - auto It = Program->ZeModuleMap.find(ZeModulePair.first); - if (It != Program->ZeModuleMap.end()) { - for (auto &Device : Program->Context->getDevices()) { - if (Device->ZeDevice == ZeModulePair.first) { - devices.push_back(Device); - } - } - } - } - return ReturnValue(devices.data(), devices.size()); - } else { - return ReturnValue(Program->Context->getDevices()[0]); - } + return ReturnValue(Program->AssociatedDevices.data(), + Program->AssociatedDevices.size()); case UR_PROGRAM_INFO_BINARY_SIZES: { std::shared_lock Guard(Program->Mutex); - size_t SzBinary; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - SzBinary = Program->CodeLength; - } else if (Program->State == ur_program_handle_t_::Exe) { - if (!Program->ZeModuleMap.empty()) { - std::vector binarySizes; - for (auto &ZeModulePair : Program->ZeModuleMap) { - size_t binarySize = 0; - ZE2UR_CALL(zeModuleGetNativeBinary, - (ZeModulePair.second, &binarySize, nullptr)); - binarySizes.push_back(binarySize); - } - return ReturnValue(binarySizes.data(), binarySizes.size()); + std::vector binarySizes; + for (auto Device : Program->AssociatedDevices) { + auto State = Program->getState(Device->ZeDevice); + auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + + if (State == ur_program_handle_t_::IL || + State == ur_program_handle_t_::Native || + State == ur_program_handle_t_::Object) { + // We don't have a binary for this device, so return size of the spirv + // code. This is an array of 1 element, initialized as if it were + // scalar. + return ReturnValue(size_t{Program->getCodeSize()}); + } else if (State == ur_program_handle_t_::Exe) { + size_t binarySize = 0; + ZE2UR_CALL(zeModuleGetNativeBinary, (ZeModule, &binarySize, nullptr)); + binarySizes.push_back(binarySize); } else { - ZE2UR_CALL(zeModuleGetNativeBinary, - (Program->ZeModule, &SzBinary, nullptr)); - return ReturnValue(SzBinary); + return UR_RESULT_ERROR_INVALID_PROGRAM; } - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } - // This is an array of 1 element, initialized as if it were scalar. - return ReturnValue(size_t{SzBinary}); + return ReturnValue(binarySizes.data(), binarySizes.size()); } case UR_PROGRAM_INFO_BINARIES: { // The caller sets "ParamValue" to an array of pointers, one for each @@ -733,100 +708,100 @@ ur_result_t urProgramGetInfo( } } std::shared_lock Guard(Program->Mutex); - // If the caller is using a Program which is IL, Native or an object, then - // the program has not been built for multiple devices so a single IL is - // returned. - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - if (PropSizeRet) - *PropSizeRet = Program->CodeLength; - if (PBinary) { - std::memcpy(PBinary[0], Program->Code.get(), Program->CodeLength); - } - } else if (Program->State == ur_program_handle_t_::Exe) { - // If the caller is using a Program which is a built binary, then - // the program returned will either be a single module if this is a native - // binary or the native binary for each device will be returned. - size_t SzBinary = 0; - uint8_t *NativeBinaryPtr = nullptr; - if (PBinary) { - NativeBinaryPtr = PBinary[0]; + uint8_t *NativeBinaryPtr = nullptr; + if (PBinary) { + NativeBinaryPtr = PBinary[0]; + } + + size_t SzBinary = 0; + for (uint32_t deviceIndex = 0; + deviceIndex < Program->AssociatedDevices.size(); deviceIndex++) { + auto ZeDevice = Program->AssociatedDevices[deviceIndex]->ZeDevice; + auto State = Program->getState(ZeDevice); + auto ZeModule = Program->getZeModuleHandle(ZeDevice); + if (!ZeModule) { + return UR_RESULT_ERROR_INVALID_PROGRAM; } - if (!Program->ZeModuleMap.empty()) { - uint32_t deviceIndex = 0; - for (auto &ZeDeviceModule : Program->ZeModuleMap) { - size_t binarySize = 0; - if (PBinary) { - NativeBinaryPtr = PBinary[deviceIndex++]; - } - ZE2UR_CALL(zeModuleGetNativeBinary, - (ZeDeviceModule.second, &binarySize, NativeBinaryPtr)); - SzBinary += binarySize; + // If the caller is using a Program which is IL, Native or an object, then + // the program has not been built for multiple devices so a single IL is + // returned. + // TODO: currently if program is not compiled for any of the associated + // devices, we just return spirv code, assuming that we either have the + // program built for all associated devices or for none. It is possible + // that program is compiled for subset of associated devices, so that case + // probably should be explicitely specified and handled better. + if (State == ur_program_handle_t_::IL || + State == ur_program_handle_t_::Native || + State == ur_program_handle_t_::Object) { + if (PropSizeRet) + *PropSizeRet = Program->getCodeSize(); + if (PBinary) { + std::memcpy(PBinary[0], Program->getCode(), Program->getCodeSize()); } - } else { + break; + } else if (State == ur_program_handle_t_::Exe) { + size_t binarySize = 0; + if (PBinary) { + NativeBinaryPtr = PBinary[deviceIndex++]; + } + // If the caller is using a Program which is a built binary, then + // the program returned will either be a single module if this is a + // native binary or the native binary for each device will be returned. ZE2UR_CALL(zeModuleGetNativeBinary, - (Program->ZeModule, &SzBinary, NativeBinaryPtr)); + (ZeModule, &binarySize, NativeBinaryPtr)); + SzBinary += binarySize; + } else { + return UR_RESULT_ERROR_INVALID_PROGRAM; } - if (PropSizeRet) - *PropSizeRet = SzBinary; - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } + if (PropSizeRet) + *PropSizeRet = SzBinary; break; } case UR_PROGRAM_INFO_NUM_KERNELS: { std::shared_lock Guard(Program->Mutex); uint32_t NumKernels = 0; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } else if (Program->State == ur_program_handle_t_::Exe) { - if (!Program->ZeModuleMap.empty()) { - ZE2UR_CALL( - zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &NumKernels, nullptr)); - } else { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &NumKernels, nullptr)); + ze_module_handle_t ZeModule = nullptr; + // Find the first module in exe state. + for (const auto &Device : Program->AssociatedDevices) { + if (Program->getState(Device->ZeDevice) == ur_program_handle_t_::Exe) { + ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + break; } - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } + + // If none of the modules are in exe state, return error. + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &NumKernels, nullptr)); return ReturnValue(size_t{NumKernels}); } case UR_PROGRAM_INFO_KERNEL_NAMES: try { std::shared_lock Guard(Program->Mutex); - std::string PINames{""}; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } else if (Program->State == ur_program_handle_t_::Exe) { - uint32_t Count = 0; - std::unique_ptr PNames; - if (!Program->ZeModuleMap.empty()) { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &Count, nullptr)); - PNames = std::make_unique(Count); - ZE2UR_CALL( - zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &Count, PNames.get())); - } else { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &Count, nullptr)); - PNames = std::make_unique(Count); - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &Count, PNames.get())); - } - for (uint32_t I = 0; I < Count; ++I) { - PINames += (I > 0 ? ";" : ""); - PINames += PNames[I]; + ze_module_handle_t ZeModule = nullptr; + // Find the first module in exe state. + for (const auto &Device : Program->AssociatedDevices) { + if (Program->getState(Device->ZeDevice) == ur_program_handle_t_::Exe) { + ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + break; } - } else { + } + + // If none of the modules are in exe state, return error. + if (!ZeModule) return UR_RESULT_ERROR_INVALID_PROGRAM; + + std::string PINames{""}; + uint32_t Count = 0; + std::unique_ptr PNames; + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &Count, nullptr)); + PNames = std::make_unique(Count); + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &Count, PNames.get())); + for (uint32_t I = 0; I < Count; ++I) { + PINames += (I > 0 ? ";" : ""); + PINames += PNames[I]; } return ReturnValue(PINames.c_str()); } catch (const std::bad_alloc &) { @@ -835,7 +810,7 @@ ur_result_t urProgramGetInfo( return UR_RESULT_ERROR_UNKNOWN; } case UR_PROGRAM_INFO_IL: - return ReturnValue(Program->Code.get(), Program->CodeLength); + return ReturnValue(Program->getCode(), Program->getCodeSize()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -863,11 +838,13 @@ ur_result_t urProgramGetBuildInfo( UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); if (PropName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { ur_program_binary_type_t Type = UR_PROGRAM_BINARY_TYPE_NONE; - if (Program->State == ur_program_handle_t_::Object) { + auto State = Program->getState(Device->ZeDevice); + if (State == ur_program_handle_t_::Object) { Type = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; - } else if (Program->State == ur_program_handle_t_::Exe) { + } else if (State == ur_program_handle_t_::Exe) { Type = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; } + return ReturnValue(ur_program_binary_type_t{Type}); } if (PropName == UR_PROGRAM_BUILD_INFO_OPTIONS) { @@ -886,10 +863,8 @@ ur_result_t urProgramGetBuildInfo( } // Next check if there is a Level Zero build log. - if (Program->ZeBuildLogMap.find(Device->ZeDevice) != - Program->ZeBuildLogMap.end()) { - ze_module_build_log_handle_t ZeBuildLog = - Program->ZeBuildLogMap.begin()->second; + auto ZeBuildLog = Program->getBuildLog(Device->ZeDevice); + if (ZeBuildLog) { size_t LogSize = PropSize; ZE2UR_CALL(zeModuleBuildLogGetString, (ZeBuildLog, &LogSize, ur_cast(PropValue))); @@ -903,10 +878,10 @@ ur_result_t urProgramGetBuildInfo( // the failed build log here because RT does not create sycl::program // when urProgramBuild() fails, thus it won't call urProgramRelease() // to clean up the build log. - if (Program->State == ur_program_handle_t_::Invalid) { + if (Program->getState(Device->ZeDevice) == + ur_program_handle_t_::Invalid) { ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLog)); - Program->ZeBuildLogMap.erase(Device->ZeDevice); - ZeBuildLog = nullptr; + Program->setBuildLog(Device->ZeDevice, nullptr); } } return UR_RESULT_SUCCESS; @@ -946,16 +921,13 @@ ur_result_t urProgramGetNativeHandle( auto ZeModule = ur_cast(NativeProgram); std::shared_lock Guard(Program->Mutex); - switch (Program->State) { - case ur_program_handle_t_::Exe: { - *ZeModule = Program->ZeModule; - break; - } - - default: + assert(Program->AssociatedDevices.size() > 0); + auto Module = + Program->getZeModuleHandle(Program->AssociatedDevices[0]->ZeDevice); + if (!Module) return UR_RESULT_ERROR_INVALID_OPERATION; - } + *ZeModule = Module; return UR_RESULT_SUCCESS; } @@ -1031,19 +1003,21 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { } } if (!resourcesReleased) { - for (auto &ZeBuildLogPair : this->ZeBuildLogMap) { - ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLogPair.second)); + for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) { + if (DeviceData.ZeBuildLog) + ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (DeviceData.ZeBuildLog)); } - if (ZeModule && OwnZeModule) { - if (ZeModuleMap.empty()) { + if (InteropZeModule && OwnZeModule) { + if (DeviceDataMap.empty()) { // interop api - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); + ZE_CALL_NOCHECK(zeModuleDestroy, (InteropZeModule)); } else { - for (auto &ZeModulePair : this->ZeModuleMap) { - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModulePair.second)); + for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) { + if (DeviceData.ZeModule) + ZE_CALL_NOCHECK(zeModuleDestroy, (DeviceData.ZeModule)); } - this->ZeModuleMap.clear(); + this->DeviceDataMap.clear(); } } resourcesReleased = true; diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 2aeeb674bd..4a15ac7fa9 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -71,8 +71,8 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_(state St, ur_context_handle_t Context, const void *Input, size_t Length) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - SpirvCode{new uint8_t[Length]}, SpirvCodeLength{Length}, - InteropZeModule{nullptr} { + AssociatedDevices(Context->getDevices()), + SpirvCode{new uint8_t[Length]}, SpirvCodeLength{Length} { std::memcpy(SpirvCode.get(), Input, Length); // All devices have the program in IL state. for (auto &Device : Context->getDevices()) { @@ -88,7 +88,7 @@ struct ur_program_handle_t_ : _ur_object { const ur_program_properties_t *Properties, const uint8_t **Inputs, const size_t *Lengths) : Context{Context}, NativeProperties(Properties), OwnZeModule{true}, - InteropZeModule{nullptr} { + AssociatedDevices(Devices, Devices + NumDevices) { for (uint32_t I = 0; I < NumDevices; ++I) { DeviceData &PerDevData = DeviceDataMap[Devices[I]->ZeDevice]; PerDevData.State = St; @@ -98,23 +98,27 @@ struct ur_program_handle_t_ : _ur_object { } } + ur_program_handle_t_(ur_context_handle_t Context) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices(Context->getDevices()) {} + // Construct a program in Exe or Invalid state. - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule, - ze_module_build_log_handle_t ZeBuildLog) + ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - InteropZeModule{ZeModule} { - for (auto &Device : Context->getDevices()) { - DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; - PerDevData.State = St; - } - } + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} {} // Construct a program in Exe state (interop). - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule, bool OwnZeModule) + // TODO: Currently it is not possible to get the device associated with the + // interop module, API must be changed to either get that info from the user + // or new API need to be added to L0 to fetch that info. Consider it + // associated with the first device in the context. + ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - InteropZeModule{ZeModule} { + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } @@ -123,11 +127,91 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_(state St, ur_context_handle_t Context, const std::string &ErrorMessage) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - ErrorMessage{ErrorMessage}, InteropZeModule{nullptr} {} + ErrorMessage{ErrorMessage}, AssociatedDevices(Context->getDevices()) { + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } + } ~ur_program_handle_t_(); void ur_release_program_resources(bool deletion); + state getState(ze_device_handle_t ZeDevice) { + if ((DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) && + InteropZeModule) + return state::Exe; + + return DeviceDataMap[ZeDevice].State; + } + + ze_module_handle_t getZeModuleHandle(ze_device_handle_t ZeDevice) { + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return InteropZeModule; + + return DeviceDataMap[ZeDevice].ZeModule; + } + + uint8_t *getCode(ze_device_handle_t ZeDevice = nullptr) { + if (!ZeDevice) + return SpirvCode.get(); + + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return nullptr; + + if (DeviceDataMap[ZeDevice].State == state::IL) + return SpirvCode.get(); + else + return DeviceDataMap[ZeDevice].Binary.first.get(); + } + + size_t getCodeSize(ze_device_handle_t ZeDevice = nullptr) { + if (ZeDevice == nullptr) + return SpirvCodeLength; + + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return 0; + + if (DeviceDataMap[ZeDevice].State == state::IL) + return SpirvCodeLength; + else + return DeviceDataMap[ZeDevice].Binary.second; + } + + ze_module_build_log_handle_t getBuildLog(ze_device_handle_t ZeDevice) { + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return nullptr; + + return DeviceDataMap[ZeDevice].ZeBuildLog; + } + + void setState(ze_device_handle_t ZeDevice, state NewState) { + DeviceDataMap[ZeDevice].State = NewState; + } + + void setZeModule(ze_device_handle_t ZeDevice, ze_module_handle_t ZeModule) { + DeviceDataMap[ZeDevice].ZeModule = ZeModule; + } + + void setBuildLog(ze_device_handle_t ZeDevice, + ze_module_build_log_handle_t ZeBuildLog) { + DeviceDataMap[ZeDevice].ZeBuildLog = ZeBuildLog; + } + + void setBuildOptions(ze_device_handle_t ZeDevice, + const std::string &Options) { + DeviceDataMap[ZeDevice].BuildFlags = Options; + } + + void appendBuildOptions(ze_device_handle_t ZeDevice, + const std::string &Options) { + DeviceDataMap[ZeDevice].BuildFlags += Options; + } + + std::string getBuildOptions(ze_device_handle_t ZeDevice) { + return DeviceDataMap[ZeDevice].BuildFlags; + } + // Tracks the release state of the program handle to determine if the // internal handle needs to be released. bool resourcesReleased = false; @@ -145,32 +229,30 @@ struct ur_program_handle_t_ : _ur_object { // message from a call to urProgramLink. const std::string ErrorMessage; - // In IL and Object states, this contains the SPIR-V representation of the - // module. - std::unique_ptr SpirvCode; // Array containing raw IL code. - size_t SpirvCodeLength; // Size (bytes) of the array. - // Used only in IL and Object states. Contains the SPIR-V specialization // constants as a map from the SPIR-V "SpecID" to a buffer that contains the // associated value. The caller of the PI layer is responsible for // maintaining the storage of this buffer. std::unordered_map SpecConstants; - // The Level Zero module handle for interoperability. - // This module handle is either initialized with the handle provided to - // interoperability UR API, or with one of the handles after building the - // program. This handle is returned by UR API which allows to get the native - // handle from the program. - // TODO: Currently interoparability UR API does not support multiple devices. - ze_module_handle_t InteropZeModule{}; + // Keep the vector of devices associated with the program. + // It is populated at program creation and used to provide information for the + // descriptors like UR_PROGRAM_INFO_DEVICES, UR_PROGRAM_INFO_BINARY_SIZES, + // UR_PROGRAM_INFO_BINARIES as they are supposed to return information in the + // same order. I.e. the first binary in the array returned by + // UR_PROGRAM_INFO_BINARIES is supposed to be associated with the first device + // in the returned array of devices for UR_PROGRAM_INFO_DEVICES. Same for + // UR_PROGRAM_INFO_BINARY_SIZES. + const std::vector AssociatedDevices; +private: struct DeviceData { // Log from the result of building the program for the device using // zeModuleCreate(). - ze_module_build_log_handle_t ZeBuildLog{}; + ze_module_build_log_handle_t ZeBuildLog = nullptr; // The Level Zero module handle for the device. Used primarily in Exe state. - ze_module_handle_t ZeModule{}; + ze_module_handle_t ZeModule = nullptr; // In Native state, contains the pair of the binary code for the device and // its length in bytes. @@ -187,4 +269,17 @@ struct ur_program_handle_t_ : _ur_object { }; std::unordered_map DeviceDataMap; + + // In IL and Object states, this contains the SPIR-V representation of the + // module. + std::unique_ptr SpirvCode; // Array containing raw IL code. + size_t SpirvCodeLength; // Size (bytes) of the array. + + // The Level Zero module handle for interoperability. + // This module handle is either initialized with the handle provided to + // interoperability UR API, or with one of the handles after building the + // program. This handle is returned by UR API which allows to get the native + // handle from the program. + // TODO: Currently interoparability UR API does not support multiple devices. + ze_module_handle_t InteropZeModule{}; }; From a3444f062523a4b8e5372d8bfe79ac8d7ad70a2b Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 19 Sep 2024 15:49:17 -0700 Subject: [PATCH 07/25] Add implementation of urProgramCreateWithBinaryExp --- source/adapters/level_zero/program.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 6dbb8e2b74..b41b985c53 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -135,7 +135,17 @@ ur_result_t urProgramCreateWithBinaryExp( ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created. ) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + try { + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( + ur_program_handle_t_::Native, hContext, numDevices, phDevices, + pProperties, ppBinaries, pLengths); + *phProgram = reinterpret_cast(UrProgram); + return UR_RESULT_SUCCESS; + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } ur_result_t urProgramBuild( From 4410e7f2989bdc2708442cf85b6176abc01f9e4d Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 25 Sep 2024 23:15:01 -0700 Subject: [PATCH 08/25] Add stub implementation for other adapters --- source/adapters/cuda/program.cpp | 8 ++++++++ source/adapters/cuda/ur_interface_loader.cpp | 1 + source/adapters/hip/program.cpp | 8 ++++++++ source/adapters/hip/ur_interface_loader.cpp | 1 + source/adapters/native_cpu/program.cpp | 8 ++++++++ source/adapters/native_cpu/ur_interface_loader.cpp | 1 + source/adapters/opencl/program.cpp | 8 ++++++++ source/adapters/opencl/ur_interface_loader.cpp | 1 + 8 files changed, 36 insertions(+) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index a475d43ce2..f44a6f972e 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -504,6 +504,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + // This entry point is only used for native specialization constants (SPIR-V), // and the CUDA plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index a9559eb188..9a358024d7 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -443,6 +443,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index b1d7d28c47..1043e1b2fc 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -522,6 +522,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + // This entry point is only used for native specialization constants (SPIR-V), // and the HIP plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 1454ddfdf1..7d200edc1b 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -409,6 +409,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index 02ddda0b50..d287794e10 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -106,6 +106,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 94c6c4a03e..2b1f7b5942 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -427,6 +427,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index f154a54051..11ba1ff88d 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -133,6 +133,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 6cd69d84d3..7d026bc981 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -434,6 +434,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } From 82a5e0ae50059e997015e207424cc284019f5751 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Fri, 27 Sep 2024 17:15:19 -0700 Subject: [PATCH 09/25] Formatting --- source/adapters/level_zero/kernel.cpp | 5 +++-- source/adapters/level_zero/program.cpp | 15 +++++++-------- source/adapters/level_zero/program.hpp | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 7781a45fda..1c972ee6b2 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -602,7 +602,8 @@ ur_result_t urKernelCreate( for (auto &Dev : Program->AssociatedDevices) { auto ZeDevice = Dev->ZeDevice; - // Program may be associated with all devices from the context but built only for subset of devices. + // Program may be associated with all devices from the context but built + // only for subset of devices. if (Program->getState(ZeDevice) != ur_program_handle_t_::state::Exe) continue; @@ -637,7 +638,7 @@ ur_result_t urKernelCreate( for (auto ZeSubDevice : ZeSubDevices) { (*RetKernel)->ZeKernelMap[ZeSubDevice] = ZeKernel; } - } + } // There is no any successfully built executable for program. if ((*RetKernel)->ZeKernelMap.empty()) return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index b41b985c53..d3e59368b0 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -122,9 +122,9 @@ ur_result_t urProgramCreateWithBinaryExp( ur_context_handle_t hContext, ///< [in] handle of the context instance uint32_t numDevices, ///< [in] number of devices ur_device_handle_t - *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of - ///< device handles. The binaries are loaded for devices - ///< specified in this list. + *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of + ///< device handles. The binaries are loaded for devices + ///< specified in this list. size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program ///< binaries specified by `pBinaries` (in bytes). const uint8_t * @@ -460,8 +460,6 @@ ur_result_t urProgramLinkExp( } } - // TODO: Use the module of the first device as the interop module because of - // lack of multi-device support for interop case. ur_program_handle_t_ *UrProgram = new ur_program_handle_t_(hContext); *phProgram = reinterpret_cast(UrProgram); for (uint32_t i = 0; i < numDevices; i++) { @@ -476,12 +474,13 @@ ur_result_t urProgramLinkExp( // here. Clear values of the previous device first. BuildFlagPtrs.clear(); for (uint32_t I = 0; I < count; I++) { - BuildFlagPtrs.push_back(phPrograms[I]->getBuildOptions(ZeDevice).c_str()); + BuildFlagPtrs.push_back( + phPrograms[I]->getBuildOptions(ZeDevice).c_str()); } ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); if (count == 1) - ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; - + ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, &ZeModule, &ZeBuildLog)); diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 4a15ac7fa9..76c3c4adf2 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -106,8 +106,8 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} {} // Construct a program in Exe state (interop). // TODO: Currently it is not possible to get the device associated with the @@ -117,8 +117,8 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } From 824ea10db30012f916249ac5955ab277807aa680 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Sun, 29 Sep 2024 22:32:32 -0700 Subject: [PATCH 10/25] Fix werror problems --- source/adapters/cuda/program.cpp | 6 ++---- source/adapters/hip/program.cpp | 6 ++---- source/adapters/native_cpu/program.cpp | 6 ++---- source/adapters/opencl/program.cpp | 6 ++---- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index f44a6f972e..9a725aedd4 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -505,10 +505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t hContext, uint32_t numDevices, - ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, - const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { + ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 1043e1b2fc..4616f40b96 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -523,10 +523,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t hContext, uint32_t numDevices, - ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, - const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { + ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index d287794e10..e66aa26907 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -107,10 +107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t hContext, uint32_t numDevices, - ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, - const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { + ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 11ba1ff88d..bd9619d38a 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -134,10 +134,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t hContext, uint32_t numDevices, - ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, - const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { + ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 787a199665477b8eedf7953372a5f10010a0b3c1 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 11:41:25 -0700 Subject: [PATCH 11/25] Fix program resources release --- source/adapters/level_zero/program.cpp | 22 ++++++++++------------ source/adapters/level_zero/program.hpp | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index d3e59368b0..9f91cddeac 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -1017,18 +1017,16 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (DeviceData.ZeBuildLog)); } - if (InteropZeModule && OwnZeModule) { - if (DeviceDataMap.empty()) { - // interop api - ZE_CALL_NOCHECK(zeModuleDestroy, (InteropZeModule)); - } else { - for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) { - if (DeviceData.ZeModule) - ZE_CALL_NOCHECK(zeModuleDestroy, (DeviceData.ZeModule)); - } - this->DeviceDataMap.clear(); - } - } + // interop api + if (InteropZeModule && OwnZeModule) + ZE_CALL_NOCHECK(zeModuleDestroy, (InteropZeModule)); + + for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) + if (DeviceData.ZeModule) + ZE_CALL_NOCHECK(zeModuleDestroy, (DeviceData.ZeModule)); + + this->DeviceDataMap.clear(); + resourcesReleased = true; } } diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 76c3c4adf2..61f097c61f 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -281,5 +281,5 @@ struct ur_program_handle_t_ : _ur_object { // program. This handle is returned by UR API which allows to get the native // handle from the program. // TODO: Currently interoparability UR API does not support multiple devices. - ze_module_handle_t InteropZeModule{}; + ze_module_handle_t InteropZeModule = nullptr; }; From 5921e5be0a1710e1ce5533c90c5d0d25a348b71f Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 23:38:52 -0700 Subject: [PATCH 12/25] Change urProgramCreateWithBinary signature in adapters --- source/adapters/cuda/program.cpp | 20 ++++---- source/adapters/cuda/ur_interface_loader.cpp | 1 - source/adapters/hip/program.cpp | 20 ++++---- source/adapters/hip/ur_interface_loader.cpp | 1 - source/adapters/level_zero/program.cpp | 46 ++++--------------- source/adapters/native_cpu/program.cpp | 15 ++++-- .../native_cpu/ur_interface_loader.cpp | 1 - source/adapters/opencl/program.cpp | 21 ++++----- .../adapters/opencl/ur_interface_loader.cpp | 1 - test/adapters/cuda/kernel_tests.cpp | 42 +++++++++-------- 10 files changed, 76 insertions(+), 92 deletions(-) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 9a725aedd4..ea3a7830ca 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -493,23 +493,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - UR_CHECK_ERROR( - createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram)); + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + UR_CHECK_ERROR(createProgram(hContext, phDevices[0], pLengths[0], + ppBinaries[0], pProperties, phProgram)); (*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - // This entry point is only used for native specialization constants (SPIR-V), // and the CUDA plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index 9a358024d7..a9559eb188 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -443,7 +443,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 4616f40b96..8bbb1b67db 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -480,9 +480,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( /// /// Note: Only supports one device UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + auto size = pLengths[0]; UR_ASSERT(std::find(hContext->getDevices().begin(), hContext->getDevices().end(), hDevice) != hContext->getDevices().end(), @@ -522,12 +532,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - // This entry point is only used for native specialization constants (SPIR-V), // and the HIP plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7d200edc1b..1454ddfdf1 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -409,7 +409,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 9f91cddeac..3ee640c4bd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -83,42 +83,6 @@ ur_result_t urProgramCreateWithIL( } ur_result_t urProgramCreateWithBinary( - ur_context_handle_t Context, ///< [in] handle of the context instance - ur_device_handle_t - Device, ///< [in] handle to device associated with binary. - size_t Size, ///< [in] size in bytes. - const uint8_t *Binary, ///< [in] pointer to binary. - const ur_program_properties_t - *Properties, ///< [in][optional] pointer to program creation properties. - ur_program_handle_t - *Program ///< [out] pointer to handle of Program object created. -) { - // In OpenCL, clCreateProgramWithBinary() can be used to load any of the - // following: "program executable", "compiled program", or "library of - // compiled programs". In addition, the loaded program can be either - // IL (SPIR-v) or native device code. For now, we assume that - // urProgramCreateWithBinary() is only used to load a "program executable" - // as native device code. - // If we wanted to support all the same cases as OpenCL, we would need to - // somehow examine the binary image to distinguish the cases. Alternatively, - // we could change the PI interface and have the caller pass additional - // information to distinguish the cases. - - try { - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Native, Context, 1, - &Device, Properties, &Binary, &Size); - *Program = reinterpret_cast(UrProgram); - } catch (const std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t urProgramCreateWithBinaryExp( ur_context_handle_t hContext, ///< [in] handle of the context instance uint32_t numDevices, ///< [in] number of devices ur_device_handle_t @@ -135,6 +99,16 @@ ur_result_t urProgramCreateWithBinaryExp( ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created. ) { + // In OpenCL, clCreateProgramWithBinary() can be used to load any of the + // following: "program executable", "compiled program", or "library of + // compiled programs". In addition, the loaded program can be either + // IL (SPIR-v) or native device code. For now, we assume that + // urProgramCreateWithBinary() is only used to load a "program executable" + // as native device code. + // If we wanted to support all the same cases as OpenCL, we would need to + // somehow examine the binary image to distinguish the cases. Alternatively, + // we could change the PI interface and have the caller pass additional + // information to distinguish the cases. try { ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( ur_program_handle_t_::Native, hContext, numDevices, phDevices, diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index e66aa26907..3c31e06783 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -54,10 +54,19 @@ deserializeWGMetadata(const ur_program_metadata_t &MetadataElement, } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - std::ignore = size; + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + std::ignore = pLengths; std::ignore = pProperties; UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 2b1f7b5942..94c6c4a03e 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -427,7 +427,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index bd9619d38a..f3ea657280 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -116,29 +116,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - - const cl_device_id Devices[1] = {cl_adapter::cast(hDevice)}; - const size_t Lengths[1] = {size}; - cl_int BinaryStatus[1]; + cl_device_id Devices[numDevices]; + for (uint32_t i = 0; i < numDevices; ++i) + Devices[i] = cl_adapter::cast(phDevices[i]); + cl_int BinaryStatus[numDevices]; cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( cl_adapter::cast(hContext), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); + Devices, pLengths, ppBinaries, BinaryStatus, &CLResult)); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 7d026bc981..6cd69d84d3 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -434,7 +434,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/test/adapters/cuda/kernel_tests.cpp b/test/adapters/cuda/kernel_tests.cpp index 80ec9146fd..085c03030b 100644 --- a/test/adapters/cuda/kernel_tests.cpp +++ b/test/adapters/cuda/kernel_tests.cpp @@ -74,9 +74,10 @@ const char *threeParamsTwoLocal = "\n\ TEST_P(cudaKernelTest, CreateProgramAndKernel) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -116,9 +117,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { ur_program_properties_t programProps{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, 1, &reqdWorkGroupSizeMDProp}; uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - &programProps, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + &programProps, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -138,9 +140,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { TEST_P(cudaKernelTest, URKernelArgumentSimple) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -160,9 +163,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSimple) { TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -189,9 +193,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { TEST_P(cudaKernelTest, URKernelDispatch) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -218,9 +223,10 @@ TEST_P(cudaKernelTest, URKernelDispatch) { TEST_P(cudaKernelTest, URKernelDispatchTwo) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)twoParams, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&twoParams), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); From dcc56f72882fca42d991452f30459239ea36ccae Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 1 Oct 2024 00:50:56 -0700 Subject: [PATCH 13/25] Fix mistakes --- source/adapters/level_zero/program.hpp | 12 ++++++------ source/adapters/opencl/program.cpp | 16 +++++++++------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 61f097c61f..025c6a981b 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -103,22 +103,22 @@ struct ur_program_handle_t_ : _ur_object { AssociatedDevices(Context->getDevices()) {} // Construct a program in Exe or Invalid state. - ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, + ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} {} // Construct a program in Exe state (interop). // TODO: Currently it is not possible to get the device associated with the // interop module, API must be changed to either get that info from the user // or new API need to be added to L0 to fetch that info. Consider it // associated with the first device in the context. - ur_program_handle_t_([[maybe_unused]] state St, ur_context_handle_t Context, + ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index f3ea657280..9c8f214410 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -118,17 +118,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, uint32_t numDevices, ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, - const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { - cl_device_id Devices[numDevices]; + const ur_program_properties_t *, ur_program_handle_t *phProgram) { + std::vector Devices(numDevices); for (uint32_t i = 0; i < numDevices; ++i) Devices[i] = cl_adapter::cast(phDevices[i]); - cl_int BinaryStatus[numDevices]; + std::vector BinaryStatus(numDevices); cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( - cl_adapter::cast(hContext), cl_adapter::cast(1u), - Devices, pLengths, ppBinaries, BinaryStatus, &CLResult)); - CL_RETURN_ON_FAILURE(BinaryStatus[0]); + cl_adapter::cast(hContext), + cl_adapter::cast(numDevices), Devices.data(), pLengths, + ppBinaries, BinaryStatus.data(), &CLResult)); + for (uint32_t i = 0; i < numDevices; ++i) { + CL_RETURN_ON_FAILURE(BinaryStatus[i]); + } CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; From 4adb52514b3e826d08c17545122a179ecbf7c961 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 1 Oct 2024 17:34:25 -0700 Subject: [PATCH 14/25] Add numDevices == 0 condition --- include/ur_api.h | 1 + scripts/core/program.yml | 1 + source/adapters/cuda/program.cpp | 3 --- source/adapters/hip/program.cpp | 3 --- source/adapters/level_zero/program.hpp | 8 ++++---- source/adapters/native_cpu/program.cpp | 3 --- source/loader/layers/validation/ur_valddi.cpp | 4 ++++ source/loader/ur_libapi.cpp | 1 + source/ur_api.cpp | 1 + 9 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 02e52c0b4f..e9a9be3d08 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -4235,6 +4235,7 @@ urProgramCreateWithIL( /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY /// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` UR_APIEXPORT ur_result_t UR_APICALL diff --git a/scripts/core/program.yml b/scripts/core/program.yml index e35c895360..769a312f1d 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -158,6 +158,7 @@ returns: - "`NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas`" - $X_RESULT_ERROR_INVALID_SIZE: - "`NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0`" + - "`numDevices == 0`" - $X_RESULT_ERROR_INVALID_NATIVE_BINARY: - "If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.`" --- #-------------------------------------------------------------------------- diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index ea3a7830ca..4b963a737a 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -500,9 +500,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( if (numDevices > 1) return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - if (numDevices == 0) - return UR_RESULT_ERROR_INVALID_DEVICE; - UR_CHECK_ERROR(createProgram(hContext, phDevices[0], pLengths[0], ppBinaries[0], pProperties, phProgram)); (*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 8bbb1b67db..4c4f2b2766 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -487,9 +487,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( if (numDevices > 1) return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - if (numDevices == 0) - return UR_RESULT_ERROR_INVALID_DEVICE; - auto hDevice = phDevices[0]; auto pBinary = ppBinaries[0]; auto size = pLengths[0]; diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 025c6a981b..8977762c74 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -106,8 +106,8 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} {} // Construct a program in Exe state (interop). // TODO: Currently it is not possible to get the device associated with the @@ -117,8 +117,8 @@ struct ur_program_handle_t_ : _ur_object { ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index 3c31e06783..bc7baeb387 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -61,9 +61,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( if (numDevices > 1) return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - if (numDevices == 0) - return UR_RESULT_ERROR_INVALID_DEVICE; - auto hDevice = phDevices[0]; auto pBinary = ppBinaries[0]; std::ignore = pLengths; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 65d0c5f4ab..e9bb86e299 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -2773,6 +2773,10 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( pProperties->count == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (numDevices == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } if (getContext()->enableLifetimeValidation && diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 6b0fbcf7d7..21464aaec2 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -3029,6 +3029,7 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY /// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( diff --git a/source/ur_api.cpp b/source/ur_api.cpp index ebe0a6ede9..1e11804818 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2596,6 +2596,7 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY /// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( From 7a27ae449cdb26fa62b4250e7edba8fafd1e6290 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 7 Oct 2024 22:37:39 +0200 Subject: [PATCH 15/25] Fix remaining conformance tests --- .../program/urProgramCreateWithBinary.cpp | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/test/conformance/program/urProgramCreateWithBinary.cpp b/test/conformance/program/urProgramCreateWithBinary.cpp index 0f525dd293..c34c5a3223 100644 --- a/test/conformance/program/urProgramCreateWithBinary.cpp +++ b/test/conformance/program/urProgramCreateWithBinary.cpp @@ -38,45 +38,54 @@ struct urProgramCreateWithBinaryTest : uur::urProgramTest { UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urProgramCreateWithBinaryTest); TEST_P(urProgramCreateWithBinaryTest, Success) { - ASSERT_SUCCESS(urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), nullptr, - &binary_program)); + auto size = binary.size(); + const uint8_t *data = binary.data(); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &size, &data, + nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullHandleContext) { + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urProgramCreateWithBinary(nullptr, device, binary.size(), - binary.data(), nullptr, + urProgramCreateWithBinary(nullptr, 1, &device, &size, + &data, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullHandleDevice) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urProgramCreateWithBinary(context, nullptr, binary.size(), - binary.data(), nullptr, + auto size = binary.size(); + const uint8_t *data = binary.data(); + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, + urProgramCreateWithBinary(context, 0, nullptr, &size, + &data, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerBinary) { + auto size = binary.size(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), + urProgramCreateWithBinary(context, 1, &device, &size, nullptr, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerProgram) { + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), nullptr, - nullptr)); + urProgramCreateWithBinary(context, 1, &device, &size, + &data, nullptr, nullptr)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerMetadata) { ur_program_properties_t properties = {}; properties.count = 1; + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), &properties, + urProgramCreateWithBinary(context, 1, &device, &size, + &data, &properties, &binary_program)); } @@ -89,17 +98,21 @@ TEST_P(urProgramCreateWithBinaryTest, InvalidSizePropertyCount) { md_string.size(), md_value}; ur_program_properties_t properties = {}; properties.pMetadatas = &md; + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), &properties, + urProgramCreateWithBinary(context, 1, &device, &size, + &data, &properties, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, BuildInvalidProgramBinary) { ur_program_handle_t program = nullptr; uint8_t binary[] = {0, 1, 2, 3, 4}; - auto result = urProgramCreateWithBinary(context, device, 5, binary, nullptr, - &program); + const uint8_t *data = binary; + size_t size = 5; + auto result = urProgramCreateWithBinary(context, 1, &device, &size, &data, + nullptr, &program); // The driver is not required to reject the binary ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_BINARY || result == UR_RESULT_SUCCESS); From 1f2f010d44971cf1adf584c2e243c6f9aa7a2f9c Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 8 Oct 2024 07:26:59 +0200 Subject: [PATCH 16/25] Fix v2 L0 adapter --- source/adapters/level_zero/program.hpp | 6 +++++- source/adapters/level_zero/ur_level_zero.hpp | 8 ++++++++ source/adapters/level_zero/v2/kernel.cpp | 9 ++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 8977762c74..4ba1df943b 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -10,8 +10,12 @@ #pragma once #include "common.hpp" -#include "context.hpp" #include "device.hpp" +#ifdef UR_ADAPTER_LEVEL_ZERO_V2 +#include "v2/context.hpp" +#else +#include "context.hpp" +#endif struct ur_program_handle_t_ : _ur_object { // ur_program_handle_t_() {} diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 36965c5d58..eed3ddef63 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -25,9 +25,17 @@ #include #include "common.hpp" +#ifdef UR_ADAPTER_LEVEL_ZERO_V2 +#include "v2/context.hpp" +#else #include "context.hpp" +#endif #include "device.hpp" +#ifdef UR_ADAPTER_LEVEL_ZERO_V2 +#include "v2/event.hpp" +#else #include "event.hpp" +#endif #include "image.hpp" #include "kernel.hpp" #include "memory.hpp" diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index e98221b9e5..26f3123c29 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -40,7 +40,14 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) { ur::level_zero::urProgramRetain(hProgram); - for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) { + for (auto &Dev : hProgram->AssociatedDevices) { + auto zeDevice = Dev->ZeDevice; + // Program may be associated with all devices from the context but built + // only for subset of devices. + if (hProgram->getState(zeDevice) != ur_program_handle_t_::state::Exe) + continue; + + auto zeModule = hProgram->getZeModuleHandle(zeDevice); ZeStruct zeKernelDesc; zeKernelDesc.pKernelName = kernelName; From c7d5bfb5b217092f4f94db6b8adc67c6dd9c35f3 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 9 Oct 2024 01:36:18 +0200 Subject: [PATCH 17/25] Move constructors implementation to cpp to avoid including two version of headers --- source/adapters/level_zero/program.cpp | 61 ++++++++++++++++++++ source/adapters/level_zero/program.hpp | 55 ++---------------- source/adapters/level_zero/ur_level_zero.hpp | 8 --- 3 files changed, 67 insertions(+), 57 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 3ee640c4bd..1ca26a12fb 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -970,6 +970,67 @@ ur_result_t urProgramSetSpecializationConstants( } // namespace ur::level_zero +ur_program_handle_t_::ur_program_handle_t_(state St, + ur_context_handle_t Context, + const void *Input, size_t Length) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices(Context->getDevices()), SpirvCode{new uint8_t[Length]}, + SpirvCodeLength{Length} { + std::memcpy(SpirvCode.get(), Input, Length); + // All devices have the program in IL state. + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } +} + +ur_program_handle_t_::ur_program_handle_t_( + state St, ur_context_handle_t Context, const uint32_t NumDevices, + const ur_device_handle_t *Devices, + const ur_program_properties_t *Properties, const uint8_t **Inputs, + const size_t *Lengths) + : Context{Context}, NativeProperties(Properties), OwnZeModule{true}, + AssociatedDevices(Devices, Devices + NumDevices) { + for (uint32_t I = 0; I < NumDevices; ++I) { + DeviceData &PerDevData = DeviceDataMap[Devices[I]->ZeDevice]; + PerDevData.State = St; + PerDevData.Binary = std::make_pair( + std::unique_ptr(new uint8_t[Lengths[I]]), Lengths[I]); + std::memcpy(PerDevData.Binary.first.get(), Inputs[I], Lengths[I]); + } +} + +ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices(Context->getDevices()) {} + +ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} {} + +ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule, + bool OwnZeModule) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} { + // TODO: Currently it is not possible to understand the device associated + // with provided ZeModule. So we can't set the state on that device to Exe. +} + +ur_program_handle_t_::ur_program_handle_t_(state St, + ur_context_handle_t Context, + const std::string &ErrorMessage) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + ErrorMessage{ErrorMessage}, AssociatedDevices(Context->getDevices()) { + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } +} + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 4ba1df943b..cde0697f13 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -11,11 +11,6 @@ #include "common.hpp" #include "device.hpp" -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 -#include "v2/context.hpp" -#else -#include "context.hpp" -#endif struct ur_program_handle_t_ : _ur_object { // ur_program_handle_t_() {} @@ -73,45 +68,20 @@ struct ur_program_handle_t_ : _ur_object { // Construct a program in IL. ur_program_handle_t_(state St, ur_context_handle_t Context, const void *Input, - size_t Length) - : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices(Context->getDevices()), - SpirvCode{new uint8_t[Length]}, SpirvCodeLength{Length} { - std::memcpy(SpirvCode.get(), Input, Length); - // All devices have the program in IL state. - for (auto &Device : Context->getDevices()) { - DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; - PerDevData.State = St; - } - } + size_t Length); // Construct a program in NATIVE for multiple devices. ur_program_handle_t_(state St, ur_context_handle_t Context, const uint32_t NumDevices, const ur_device_handle_t *Devices, const ur_program_properties_t *Properties, - const uint8_t **Inputs, const size_t *Lengths) - : Context{Context}, NativeProperties(Properties), OwnZeModule{true}, - AssociatedDevices(Devices, Devices + NumDevices) { - for (uint32_t I = 0; I < NumDevices; ++I) { - DeviceData &PerDevData = DeviceDataMap[Devices[I]->ZeDevice]; - PerDevData.State = St; - PerDevData.Binary = std::make_pair( - std::unique_ptr(new uint8_t[Lengths[I]]), Lengths[I]); - std::memcpy(PerDevData.Binary.first.get(), Inputs[I], Lengths[I]); - } - } + const uint8_t **Inputs, const size_t *Lengths); - ur_program_handle_t_(ur_context_handle_t Context) - : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices(Context->getDevices()) {} + ur_program_handle_t_(ur_context_handle_t Context); // Construct a program in Exe or Invalid state. ur_program_handle_t_(state, ur_context_handle_t Context, - ze_module_handle_t InteropZeModule) - : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} {} + ze_module_handle_t InteropZeModule); // Construct a program in Exe state (interop). // TODO: Currently it is not possible to get the device associated with the @@ -119,24 +89,11 @@ struct ur_program_handle_t_ : _ur_object { // or new API need to be added to L0 to fetch that info. Consider it // associated with the first device in the context. ur_program_handle_t_(state, ur_context_handle_t Context, - ze_module_handle_t InteropZeModule, bool OwnZeModule) - : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} { - // TODO: Currently it is not possible to understand the device associated - // with provided ZeModule. So we can't set the state on that device to Exe. - } + ze_module_handle_t InteropZeModule, bool OwnZeModule); // Construct a program in Invalid state with a custom error message. ur_program_handle_t_(state St, ur_context_handle_t Context, - const std::string &ErrorMessage) - : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - ErrorMessage{ErrorMessage}, AssociatedDevices(Context->getDevices()) { - for (auto &Device : Context->getDevices()) { - DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; - PerDevData.State = St; - } - } + const std::string &ErrorMessage); ~ur_program_handle_t_(); void ur_release_program_resources(bool deletion); diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index eed3ddef63..36965c5d58 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -25,17 +25,9 @@ #include #include "common.hpp" -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 -#include "v2/context.hpp" -#else #include "context.hpp" -#endif #include "device.hpp" -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 -#include "v2/event.hpp" -#else #include "event.hpp" -#endif #include "image.hpp" #include "kernel.hpp" #include "memory.hpp" From 6d5c6868c5a7c83366ea9e87eaf61e6bfd9153b2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 9 Oct 2024 16:36:47 -0700 Subject: [PATCH 18/25] Format --- source/adapters/level_zero/program.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 1ca26a12fb..1fba7a1dcf 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -1007,15 +1007,15 @@ ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context) ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} {} ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } From 2a690145b6b7f6345bb8553061380ead09e4c249 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 9 Oct 2024 16:38:10 -0700 Subject: [PATCH 19/25] Change back --- source/adapters/level_zero/program.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 1fba7a1dcf..1ca26a12fb 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -1007,15 +1007,15 @@ ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context) ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} {} ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ - InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), + InteropZeModule{InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } From 0e397cacd59bd2cff500f3f3717cf719833e7745 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 9 Oct 2024 22:00:55 -0700 Subject: [PATCH 20/25] Fix formatting using cppformat target --- source/adapters/level_zero/program.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 1ca26a12fb..1fba7a1dcf 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -1007,15 +1007,15 @@ ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context) ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} {} + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} {} ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, ze_module_handle_t InteropZeModule, bool OwnZeModule) : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, - AssociatedDevices({Context->getDevices()[0]}), - InteropZeModule{InteropZeModule} { + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} { // TODO: Currently it is not possible to understand the device associated // with provided ZeModule. So we can't set the state on that device to Exe. } From f3fa464c8e7ff3f873251e30a2e08c9f84edafe0 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 15 Oct 2024 15:11:39 -0700 Subject: [PATCH 21/25] Enable program conformace test for L0 --- .github/workflows/multi_device.yml | 2 +- source/adapters/level_zero/program.cpp | 18 ++++++++++++------ .../urProgramCreateWithNativeHandle.cpp | 8 ++++++++ .../program/urProgramGetBuildInfo.cpp | 15 +++++++++++++-- .../program/urProgramGetFunctionPointer.cpp | 16 ++++++++++++---- .../program/urProgramGetNativeHandle.cpp | 7 +++++++ 6 files changed, 53 insertions(+), 13 deletions(-) diff --git a/.github/workflows/multi_device.yml b/.github/workflows/multi_device.yml index ebdb982148..3be4d55e88 100644 --- a/.github/workflows/multi_device.yml +++ b/.github/workflows/multi_device.yml @@ -63,4 +63,4 @@ jobs: - name: Test adapters working-directory: ${{github.workspace}}/build - run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|program|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180 + run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180 diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 1fba7a1dcf..e687430e28 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -905,8 +905,15 @@ ur_result_t urProgramGetNativeHandle( std::shared_lock Guard(Program->Mutex); assert(Program->AssociatedDevices.size() > 0); - auto Module = - Program->getZeModuleHandle(Program->AssociatedDevices[0]->ZeDevice); + // Current API doesn't allow to specify device for which we want to get the + // native handle. So, find the first device with a valid module handle. + ze_module_handle_t Module = nullptr; + for (const auto &Device : Program->AssociatedDevices) { + Module = Program->getZeModuleHandle(Device->ZeDevice); + if (Module) { + break; + } + } if (!Module) return UR_RESULT_ERROR_INVALID_OPERATION; @@ -924,7 +931,6 @@ ur_result_t urProgramCreateWithNativeHandle( ur_program_handle_t *Program ///< [out] pointer to the handle of the ///< program object created. ) { - std::ignore = Properties; UR_ASSERT(Context && NativeProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(Program, UR_RESULT_ERROR_INVALID_NULL_POINTER); auto ZeModule = ur_cast(NativeProgram); @@ -934,9 +940,9 @@ ur_result_t urProgramCreateWithNativeHandle( // executable (state Object). try { - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Exe, Context, ZeModule, - Properties->isNativeHandleOwned); + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( + ur_program_handle_t_::Exe, Context, ZeModule, + Properties ? Properties->isNativeHandleOwned : false); *Program = reinterpret_cast(UrProgram); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; diff --git a/test/conformance/program/urProgramCreateWithNativeHandle.cpp b/test/conformance/program/urProgramCreateWithNativeHandle.cpp index ddf3767e43..00493d90a8 100644 --- a/test/conformance/program/urProgramCreateWithNativeHandle.cpp +++ b/test/conformance/program/urProgramCreateWithNativeHandle.cpp @@ -9,6 +9,14 @@ struct urProgramCreateWithNativeHandleTest : uur::urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, + nullptr)); + // For Level Zero we have to build the program to have the native handle. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + } UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( urProgramGetNativeHandle(program, &native_program_handle)); } diff --git a/test/conformance/program/urProgramGetBuildInfo.cpp b/test/conformance/program/urProgramGetBuildInfo.cpp index 02faebcfb0..cf4e9b9217 100644 --- a/test/conformance/program/urProgramGetBuildInfo.cpp +++ b/test/conformance/program/urProgramGetBuildInfo.cpp @@ -33,8 +33,19 @@ TEST_P(urProgramGetBuildInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; std::vector property_value; - ASSERT_SUCCESS(urProgramGetBuildInfo(program, device, property_name, 0, - nullptr, &property_size)); + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + auto result = urProgramGetBuildInfo(program, device, property_name, 0, + nullptr, &property_size); + + if (property_name == UR_PROGRAM_BUILD_INFO_STATUS && + backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); + return; + } + + ASSERT_SUCCESS(result); property_value.resize(property_size); ASSERT_SUCCESS(urProgramGetBuildInfo(program, device, property_name, property_size, property_value.data(), diff --git a/test/conformance/program/urProgramGetFunctionPointer.cpp b/test/conformance/program/urProgramGetFunctionPointer.cpp index 00f5ad74e0..41d397d527 100644 --- a/test/conformance/program/urProgramGetFunctionPointer.cpp +++ b/test/conformance/program/urProgramGetFunctionPointer.cpp @@ -29,10 +29,18 @@ TEST_P(urProgramGetFunctionPointerTest, Success) { TEST_P(urProgramGetFunctionPointerTest, InvalidKernelName) { void *function_pointer = nullptr; std::string missing_function = "aFakeFunctionName"; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_KERNEL_NAME, - urProgramGetFunctionPointer(device, program, - missing_function.data(), - &function_pointer)); + auto result = urProgramGetFunctionPointer( + device, program, missing_function.data(), &function_pointer); + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // TODO: level zero backend incorrectly returns UR_RESULT_ERROR_UNSUPPORTED_FEATURE + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(UR_RESULT_ERROR_UNSUPPORTED_FEATURE, result); + } else { + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_KERNEL_NAME, result); + } + ASSERT_EQ(function_pointer, nullptr); } diff --git a/test/conformance/program/urProgramGetNativeHandle.cpp b/test/conformance/program/urProgramGetNativeHandle.cpp index 05bba697ec..864aa8bd62 100644 --- a/test/conformance/program/urProgramGetNativeHandle.cpp +++ b/test/conformance/program/urProgramGetNativeHandle.cpp @@ -9,6 +9,13 @@ using urProgramGetNativeHandleTest = uur::urProgramTest; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urProgramGetNativeHandleTest); TEST_P(urProgramGetNativeHandleTest, Success) { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // For Level Zero we have to build the program to have the native handle. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + } ur_native_handle_t native_program_handle = 0; if (auto error = urProgramGetNativeHandle(program, &native_program_handle)) { From 88fb72aed96f657fba6dff1982ee2070ca720590 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 17 Oct 2024 10:01:20 -0700 Subject: [PATCH 22/25] Add multi-device conformance tests for urProgramCreateWithBinary and fix bugs --- source/adapters/level_zero/program.cpp | 24 +- source/adapters/level_zero/program.hpp | 2 +- test/conformance/program/CMakeLists.txt | 1 + .../urMultiDeviceProgramCreateWithBinary.cpp | 247 ++++++++++++++++++ .../testing/include/uur/fixtures.h | 39 +++ 5 files changed, 308 insertions(+), 5 deletions(-) create mode 100644 test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index e687430e28..d7adc5eb37 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -110,6 +110,11 @@ ur_result_t urProgramCreateWithBinary( // we could change the PI interface and have the caller pass additional // information to distinguish the cases. try { + for (uint32_t i = 0; i < numDevices; i++) { + UR_ASSERT(ppBinaries[i] || !pLengths[0], UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT(hContext->isValidDevice(phDevices[i]), + UR_RESULT_ERROR_INVALID_DEVICE); + } ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( ur_program_handle_t_::Native, hContext, numDevices, phDevices, pProperties, ppBinaries, pLengths); @@ -659,12 +664,15 @@ ur_result_t urProgramGetInfo( std::vector binarySizes; for (auto Device : Program->AssociatedDevices) { auto State = Program->getState(Device->ZeDevice); + if (State == ur_program_handle_t_::Native) { + binarySizes.push_back(Program->getCodeSize(Device->ZeDevice)); + continue; + } auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); if (!ZeModule) return UR_RESULT_ERROR_INVALID_PROGRAM; if (State == ur_program_handle_t_::IL || - State == ur_program_handle_t_::Native || State == ur_program_handle_t_::Object) { // We don't have a binary for this device, so return size of the spirv // code. This is an array of 1 element, initialized as if it were @@ -701,11 +709,20 @@ ur_result_t urProgramGetInfo( deviceIndex < Program->AssociatedDevices.size(); deviceIndex++) { auto ZeDevice = Program->AssociatedDevices[deviceIndex]->ZeDevice; auto State = Program->getState(ZeDevice); + if (State == ur_program_handle_t_::Native) { + // If Program was created from Native code then return that code. + if (PBinary) { + std::memcpy(PBinary[deviceIndex], Program->getCode(ZeDevice), + Program->getCodeSize(ZeDevice)); + } + SzBinary += Program->getCodeSize(ZeDevice); + continue; + } auto ZeModule = Program->getZeModuleHandle(ZeDevice); if (!ZeModule) { return UR_RESULT_ERROR_INVALID_PROGRAM; } - // If the caller is using a Program which is IL, Native or an object, then + // If the caller is using a Program which is IL or an object, then // the program has not been built for multiple devices so a single IL is // returned. // TODO: currently if program is not compiled for any of the associated @@ -714,7 +731,6 @@ ur_result_t urProgramGetInfo( // that program is compiled for subset of associated devices, so that case // probably should be explicitely specified and handled better. if (State == ur_program_handle_t_::IL || - State == ur_program_handle_t_::Native || State == ur_program_handle_t_::Object) { if (PropSizeRet) *PropSizeRet = Program->getCodeSize(); @@ -725,7 +741,7 @@ ur_result_t urProgramGetInfo( } else if (State == ur_program_handle_t_::Exe) { size_t binarySize = 0; if (PBinary) { - NativeBinaryPtr = PBinary[deviceIndex++]; + NativeBinaryPtr = PBinary[deviceIndex]; } // If the caller is using a Program which is a built binary, then // the program returned will either be a single module if this is a diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index cde0697f13..4fe8c24acd 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -234,7 +234,7 @@ struct ur_program_handle_t_ : _ur_object { // In IL and Object states, this contains the SPIR-V representation of the // module. std::unique_ptr SpirvCode; // Array containing raw IL code. - size_t SpirvCodeLength; // Size (bytes) of the array. + size_t SpirvCodeLength = 0; // Size (bytes) of the array. // The Level Zero module handle for interoperability. // This module handle is either initialized with the handle provided to diff --git a/test/conformance/program/CMakeLists.txt b/test/conformance/program/CMakeLists.txt index 317e3df946..31235eaf71 100644 --- a/test/conformance/program/CMakeLists.txt +++ b/test/conformance/program/CMakeLists.txt @@ -7,6 +7,7 @@ add_conformance_test_with_kernels_environment(program urProgramBuild.cpp urProgramCompile.cpp urProgramCreateWithBinary.cpp + urMultiDeviceProgramCreateWithBinary.cpp urProgramCreateWithIL.cpp urProgramCreateWithNativeHandle.cpp urProgramGetBuildInfo.cpp diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp new file mode 100644 index 0000000000..95a135af1c --- /dev/null +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -0,0 +1,247 @@ + +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +struct urMultiDeviceProgramCreateWithBinaryTest + : uur::urMultiDeviceProgramTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::SetUp()); + + // First obtain binaries for all devices from the compiler SPIRV program. + devices = uur::DevicesEnvironment::instance->devices; + if (devices.size() < 2) { + GTEST_SKIP(); + } + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + size_t binary_sizes_len = 0; + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + 0, nullptr, &binary_sizes_len)); + // We're expecting number of binaries equal to number of devices. + ASSERT_EQ(binary_sizes_len / sizeof(size_t), devices.size()); + binary_sizes.resize(devices.size()); + binaries.resize(devices.size()); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + binary_sizes.size() * sizeof(size_t), + binary_sizes.data(), nullptr)); + for (size_t i = 0; i < devices.size(); i++) { + size_t binary_size = binary_sizes[i]; + binaries[i].resize(binary_size); + pointers.push_back(binaries[i].data()); + } + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * pointers.size(), + pointers.data(), nullptr)); + + // Now create a program with multiple device binaries. + ASSERT_SUCCESS(urProgramCreateWithBinary( + context, devices.size(), devices.data(), binary_sizes.data(), + pointers.data(), nullptr, &binary_program)); + } + + void TearDown() override { + if (binary_program) { + EXPECT_SUCCESS(urProgramRelease(binary_program)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::TearDown()); + } + + std::vector> binaries; + std::vector devices; + std::vector pointers; + std::vector binary_sizes; + ur_program_handle_t binary_program = nullptr; +}; + +// Create the kernel using the program created with multiple binaries and run it on all devices. +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + CreateAndRunKernelOnAllDevices) { + constexpr size_t global_offset = 0; + constexpr size_t n_dimensions = 1; + constexpr size_t global_size = 100; + constexpr size_t local_size = 100; + + auto kernelName = + uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; + + for (size_t i = 1; i < devices.size(); i++) { + uur::raii::Kernel kernel; + ASSERT_SUCCESS(urProgramBuild(context, binary_program, nullptr)); + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queues[i], kernel.get(), n_dimensions, &global_offset, &local_size, + &global_size, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queues[i])); + } +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CheckCompileAndLink) { + // TODO: Current behaviour is that we allow to compile only IL programs for Level Zero and link only programs in Object state. + // OpenCL allows to compile and link programs created from native binaries, so probably we should align those two. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(urProgramCompile(context, binary_program, nullptr), + UR_RESULT_ERROR_INVALID_OPERATION); + uur::raii::Program linked_program; + ASSERT_EQ(urProgramLink(context, 1, &binary_program, nullptr, + linked_program.ptr()), + UR_RESULT_ERROR_INVALID_OPERATION); + } else if (backend == UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS(urProgramCompile(context, binary_program, nullptr)); + uur::raii::Program linked_program; + ASSERT_SUCCESS(urProgramLink(context, 1, &binary_program, nullptr, + linked_program.ptr())); + } else { + GTEST_SKIP(); + } +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + InvalidProgramBinaryForOneOfTheDevices) { + std::vector pointers_with_invalid_binary; + for (size_t i = 1; i < devices.size(); i++) { + pointers_with_invalid_binary.push_back(nullptr); + } + uur::raii::Program invalid_bin_program; + ASSERT_EQ(urProgramCreateWithBinary(context, devices.size(), devices.data(), + binary_sizes.data(), + pointers_with_invalid_binary.data(), + nullptr, invalid_bin_program.ptr()), + UR_RESULT_ERROR_INVALID_VALUE); +} + +// Test the case when program is built multiple times for different devices from context. +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, MultipleBuildCalls) { + // Run test only for level zero backend which supports urProgramBuildExp. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) { + GTEST_SKIP(); + } + auto first_subset = std::vector( + devices.begin(), devices.begin() + devices.size() / 2); + auto second_subset = std::vector( + devices.begin() + devices.size() / 2, devices.end()); + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + first_subset.data(), nullptr)); + auto kernelName = + uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; + uur::raii::Kernel kernel; + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + ASSERT_SUCCESS(urProgramBuildExp(binary_program, second_subset.size(), + second_subset.data(), nullptr)); + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + + // Building for the same subset of devices should not fail. + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + first_subset.data(), nullptr)); +} + +// Test the case we get native binaries from program created with multiple binaries which wasn't built (i.e. in Native state). +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + GetBinariesAndSizesFromProgramInNativeState) { + size_t exp_binary_sizes_len = 0; + std::vector exp_binary_sizes; + std::vector> exp_binaries; + std::vector exp_pointer; + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_BINARY_SIZES, 0, nullptr, + &exp_binary_sizes_len)); + auto num = exp_binary_sizes_len / sizeof(size_t); + exp_binary_sizes.resize(num); + exp_binaries.resize(num); + exp_pointer.resize(num); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_BINARY_SIZES, + exp_binary_sizes.size() * sizeof(size_t), + exp_binary_sizes.data(), nullptr)); + for (size_t i = 0; i < devices.size(); i++) { + size_t binary_size = exp_binary_sizes[i]; + exp_binaries[i].resize(binary_size); + exp_pointer[i] = exp_binaries[i].data(); + } + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * exp_pointer.size(), + exp_pointer.data(), nullptr)); + + // Verify that we get exactly what was provided at the creation step. + ASSERT_EQ(exp_binaries, binaries); + ASSERT_EQ(exp_binary_sizes, binary_sizes); +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, GetIL) { + size_t il_length = 0; + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_IL, 0, + nullptr, &il_length)); + ASSERT_EQ(il_length, 0); + std::vector il(il_length); + ASSERT_EQ(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_IL, il.size(), + il.data(), nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CheckProgramGetInfo) { + std::vector property_value; + size_t property_size = 0; + + // Program is not in exe state, so error is expected. + for (auto prop : + {UR_PROGRAM_INFO_NUM_KERNELS, UR_PROGRAM_INFO_KERNEL_NAMES}) { + auto result = + urProgramGetInfo(binary_program, prop, 0, nullptr, &property_size); + // TODO: OpenCL and Level Zero return diffent error code, it needs to be fixed. + ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE || + result == UR_RESULT_ERROR_INVALID_PROGRAM); + } + + // Now build the program and check that we can get the info. + ASSERT_SUCCESS(urProgramBuild(context, binary_program, nullptr)); + + size_t logSize; + std::string log; + + for (auto dev : devices) { + ASSERT_SUCCESS(urProgramGetBuildInfo( + program, dev, UR_PROGRAM_BUILD_INFO_LOG, 0, nullptr, &logSize)); + // The size should always include the null terminator. + ASSERT_GT(logSize, 0); + log.resize(logSize); + ASSERT_SUCCESS(urProgramGetBuildInfo(program, dev, + UR_PROGRAM_BUILD_INFO_LOG, logSize, + log.data(), nullptr)); + ASSERT_EQ(log[logSize - 1], '\0'); + } + + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_NUM_KERNELS, + 0, nullptr, &property_size)); + property_value.resize(property_size); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_NUM_KERNELS, + property_size, property_value.data(), + nullptr)); + + auto returned_num_of_kernels = + reinterpret_cast(property_value.data()); + ASSERT_GT(*returned_num_of_kernels, 0U); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, + &property_size)); + property_value.resize(property_size); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_KERNEL_NAMES, property_size, + property_value.data(), nullptr)); + auto returned_kernel_names = + reinterpret_cast(property_value.data()); + ASSERT_STRNE(returned_kernel_names, ""); +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index b853164fb6..a7e586dca8 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1560,6 +1560,45 @@ struct urMultiDeviceQueueTest : urMultiDeviceContextTest { std::vector queues; }; +struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceQueueTest::SetUp()); + + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // Multi-device programs are not supported for AMD and CUDA + if (backend == UR_PLATFORM_BACKEND_HIP && + backend == UR_PLATFORM_BACKEND_CUDA) { + GTEST_SKIP(); + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::KernelsEnvironment::instance->LoadSource(program_name, + il_binary)); + + const ur_program_properties_t properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(metadatas.size()), + metadatas.empty() ? nullptr : metadatas.data()}; + + ASSERT_SUCCESS(urProgramCreateWithIL(context, (*il_binary).data(), + (*il_binary).size(), &properties, + &program)); + } + + void TearDown() override { + if (program) { + EXPECT_SUCCESS(urProgramRelease(program)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceQueueTest::TearDown()); + } + + std::shared_ptr> il_binary; + std::string program_name = "foo"; + ur_program_handle_t program = nullptr; + std::vector metadatas{}; +}; + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_FIXTURES_H_INCLUDED From 3a092a320a069c2064ba32f995069827781841af Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 17 Oct 2024 16:10:53 -0700 Subject: [PATCH 23/25] Update match file for L0 and L0 v2 --- .../program/program_adapter_level_zero.match | 12 ------------ .../program/program_adapter_level_zero_v2.match | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index 445f7e6fbd..e69de29bb2 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -1,12 +0,0 @@ -{{NONDETERMINISTIC}} -urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_UR_PROGRAM_BUILD_INFO_STATUS -urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.LinkFailure/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 2c5b6500c3..e69de29bb2 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,12 +0,0 @@ -{{NONDETERMINISTIC}} -urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROGRAM_BUILD_INFO_STATUS -urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.LinkFailure/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From 82beeee2a1d31dc0517164cd70e8db4457803254 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 17 Oct 2024 22:34:35 -0700 Subject: [PATCH 24/25] Fix after rebase --- .../v2/queue_immediate_in_order.cpp | 24 +++++-------------- source/loader/layers/sanitizer/ur_sanddi.cpp | 19 ++++++++++----- .../program/program_adapter_level_zero.match | 4 ++++ .../program_adapter_level_zero_v2.match | 4 ++++ 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index b68af85033..dfb820d39a 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -847,15 +847,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableWrite( ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // TODO: implement program->getZeModuleMap() to be sure that - // it's thread-safe - ze_module_handle_t zeModule{}; - auto It = hProgram->ZeModuleMap.find(this->hDevice->ZeDevice); - if (It != hProgram->ZeModuleMap.end()) { - zeModule = It->second; - } else { - zeModule = hProgram->ZeModule; - } + // TODO: make getZeModuleHandle thread-safe + ze_module_handle_t zeModule = + hProgram->getZeModuleHandle(this->hDevice->ZeDevice); // Find global variable pointer auto globalVarPtr = getGlobalPointerFromModule(zeModule, offset, count, name); @@ -869,15 +863,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableRead( ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // TODO: implement program->getZeModule() to be sure that - // it's thread-safe - ze_module_handle_t zeModule{}; - auto It = hProgram->ZeModuleMap.find(this->hDevice->ZeDevice); - if (It != hProgram->ZeModuleMap.end()) { - zeModule = It->second; - } else { - zeModule = hProgram->ZeModule; - } + // TODO: make getZeModuleHandle thread-safe + ze_module_handle_t zeModule = + hProgram->getZeModuleHandle(this->hDevice->ZeDevice); // Find global variable pointer auto globalVarPtr = getGlobalPointerFromModule(zeModule, offset, count, name); diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp index 2f02c9270e..95b1649691 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -206,10 +206,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -224,8 +230,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( getContext()->logger.debug("==== urProgramCreateWithBinary"); - UR_CALL(pfnProgramCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram)); + UR_CALL(pfnProgramCreateWithBinary(hContext, numDevices, phDevices, + pLengths, ppBinaries, pProperties, + phProgram)); UR_CALL(getContext()->interceptor->insertProgram(*phProgram)); return UR_RESULT_SUCCESS; diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index e69de29bb2..bd7e269d9f 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -0,0 +1,4 @@ +{{NONDETERMINISTIC}} +urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index e69de29bb2..892b7cfb51 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -0,0 +1,4 @@ +{{NONDETERMINISTIC}} +urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From 92df0e4e3377622b3a3c906112f79fcb18f83614 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Fri, 18 Oct 2024 08:15:30 -0700 Subject: [PATCH 25/25] Fix werror problem --- test/conformance/testing/include/uur/fixtures.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index a7e586dca8..00bee6ba14 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1568,7 +1568,7 @@ struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, sizeof(backend), &backend, nullptr)); // Multi-device programs are not supported for AMD and CUDA - if (backend == UR_PLATFORM_BACKEND_HIP && + if (backend == UR_PLATFORM_BACKEND_HIP || backend == UR_PLATFORM_BACKEND_CUDA) { GTEST_SKIP(); }